use std::path::PathBuf;
use pyo3::exceptions::{PyIOError, PyNotImplementedError, PyRuntimeError, PyValueError};
use pyo3::prelude::*;
#[cfg(feature = "python")]
use pyo3::types::PyBytes;
#[cfg(feature = "python")]
#[cfg(any(not(feature = "office"), not(feature = "ocr")))]
use pyo3::types::{PyDict, PyTuple};
use crate::api::PdfBuilder as RustPdfBuilder;
#[cfg(feature = "python")]
use crate::converters::ConversionOptions as RustConversionOptions;
use crate::document::PdfDocument as RustPdfDocument;
use crate::extractors::forms::{
field_flags, FieldType as RustFieldType, FieldValue as RustFieldValue,
FormField as RustFormField,
};
use crate::layout::{Color as RustColor, TextChar as RustTextChar};
use crate::writer::{BlendMode as RustBlendMode, LineCap as RustLineCap, LineJoin as RustLineJoin};
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "PdfDocument")]
pub struct PyPdfDocument {
pub(crate) inner: RustPdfDocument,
pub(crate) path: Option<String>,
pub(crate) raw_bytes: Option<Vec<u8>>,
pub(crate) editor: Option<crate::editor::DocumentEditor>,
}
impl PyPdfDocument {
fn ensure_editor(&mut self) -> PyResult<()> {
if self.editor.is_none() {
let editor = if let Some(ref path) = self.path {
crate::editor::DocumentEditor::open(path)
} else if let Some(ref bytes) = self.raw_bytes {
crate::editor::DocumentEditor::from_bytes(bytes.clone())
} else {
return Err(PyRuntimeError::new_err("No document source available"));
};
self.editor =
Some(editor.map_err(|e| {
PyRuntimeError::new_err(format!("Failed to open editor: {}", e))
})?);
}
Ok(())
}
}
#[pymethods]
impl PyPdfDocument {
#[new]
#[pyo3(signature = (path, password=None))]
#[allow(unused_mut)]
fn new(path: PathBuf, password: Option<&str>) -> PyResult<Self> {
let mut doc = RustPdfDocument::open(&path)
.map_err(|e| PyIOError::new_err(format!("Failed to open PDF: {}", e)))?;
if let Some(pw) = password {
doc.authenticate(pw.as_bytes())
.map_err(|e| PyRuntimeError::new_err(format!("Authentication failed: {}", e)))?;
}
let path_str = path.to_string_lossy().into_owned();
Ok(PyPdfDocument {
inner: doc,
path: Some(path_str),
raw_bytes: None,
editor: None,
})
}
#[staticmethod]
#[pyo3(signature = (data, password=None))]
#[allow(unused_mut)]
fn from_bytes(data: &Bound<'_, PyBytes>, password: Option<&str>) -> PyResult<Self> {
let bytes = data.as_bytes().to_vec();
let mut doc = RustPdfDocument::from_bytes(bytes.clone())
.map_err(|e| PyIOError::new_err(format!("Failed to open PDF from bytes: {}", e)))?;
if let Some(pw) = password {
doc.authenticate(pw.as_bytes())
.map_err(|e| PyRuntimeError::new_err(format!("Authentication failed: {}", e)))?;
}
Ok(PyPdfDocument {
inner: doc,
path: None,
raw_bytes: Some(bytes),
editor: None,
})
}
fn __enter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
slf
}
fn __exit__(
&mut self,
_exc_type: Option<&Bound<'_, PyAny>>,
_exc_val: Option<&Bound<'_, PyAny>>,
_exc_tb: Option<&Bound<'_, PyAny>>,
) -> PyResult<bool> {
Ok(false)
}
fn version(&self) -> (u8, u8) {
self.inner.version()
}
fn authenticate(&mut self, password: &str) -> PyResult<bool> {
self.inner
.authenticate(password.as_bytes())
.map_err(|e| PyRuntimeError::new_err(format!("Authentication failed: {}", e)))
}
fn page_count(&mut self) -> PyResult<usize> {
self.inner
.page_count()
.map_err(|e| PyRuntimeError::new_err(format!("Failed to get page count: {}", e)))
}
fn signatures(&mut self) -> PyResult<Vec<PySignature>> {
let list = crate::signatures::enumerate_signatures(&mut self.inner).map_err(|e| {
PyRuntimeError::new_err(format!("Failed to enumerate signatures: {}", e))
})?;
Ok(list.into_iter().map(|info| PySignature { info }).collect())
}
fn signature_count(&mut self) -> PyResult<usize> {
crate::signatures::count_signatures(&mut self.inner)
.map_err(|e| PyRuntimeError::new_err(format!("Failed to count signatures: {}", e)))
}
#[pyo3(signature = (page, region=None))]
fn extract_text(
&mut self,
page: usize,
region: Option<(f32, f32, f32, f32)>,
) -> PyResult<String> {
if let Some((x, y, w, h)) = region {
self.inner
.extract_text_in_rect(
page,
crate::geometry::Rect::new(x, y, w, h),
crate::layout::RectFilterMode::Intersects,
)
.map_err(|e| {
PyRuntimeError::new_err(format!("Failed to extract text in region: {}", e))
})
} else {
self.inner
.extract_text(page)
.map_err(|e| PyRuntimeError::new_err(format!("Failed to extract text: {}", e)))
}
}
#[pyo3(signature = (threshold=0.8))]
fn remove_headers(&mut self, threshold: f32) -> PyResult<usize> {
let count = self
.inner
.remove_headers(threshold)
.map_err(|e| PyRuntimeError::new_err(format!("Header removal failed: {}", e)))?;
self.sync_editor_erasures()?;
Ok(count)
}
#[pyo3(signature = (threshold=0.8))]
fn remove_footers(&mut self, threshold: f32) -> PyResult<usize> {
let count = self
.inner
.remove_footers(threshold)
.map_err(|e| PyRuntimeError::new_err(format!("Footer removal failed: {}", e)))?;
self.sync_editor_erasures()?;
Ok(count)
}
#[pyo3(signature = (threshold=0.8))]
fn remove_artifacts(&mut self, threshold: f32) -> PyResult<usize> {
let count = self
.inner
.remove_artifacts(threshold)
.map_err(|e| PyRuntimeError::new_err(format!("Artifact removal failed: {}", e)))?;
self.sync_editor_erasures()?;
Ok(count)
}
fn sync_editor_erasures(&mut self) -> PyResult<()> {
if let Some(ref mut editor) = self.editor {
for (page, regions) in self.inner.erase_regions.iter() {
editor.clear_erase_regions(*page);
for rect in regions {
let _ = editor.erase_region(
*page,
[rect.x, rect.y, rect.x + rect.width, rect.y + rect.height],
);
}
}
}
Ok(())
}
fn erase_header(&mut self, page: usize) -> PyResult<()> {
self.ensure_editor()?;
self.inner
.erase_header(page)
.map_err(|e| PyRuntimeError::new_err(format!("Failed to erase header: {}", e)))?;
self.sync_editor_erasures()?;
Ok(())
}
fn edit_header(&mut self, page: usize) -> PyResult<()> {
self.erase_header(page)
}
fn erase_footer(&mut self, page: usize) -> PyResult<()> {
self.ensure_editor()?;
self.inner
.erase_footer(page)
.map_err(|e| PyRuntimeError::new_err(format!("Failed to erase footer: {}", e)))?;
self.sync_editor_erasures()?;
Ok(())
}
fn edit_footer(&mut self, page: usize) -> PyResult<()> {
self.erase_footer(page)
}
fn erase_artifacts(&mut self, page: usize) -> PyResult<()> {
self.inner
.erase_artifacts(page)
.map_err(|e| PyRuntimeError::new_err(format!("Failed to erase artifacts: {}", e)))
}
fn within(slf: Py<Self>, page: usize, bbox: (f32, f32, f32, f32)) -> PyResult<PyPdfPageRegion> {
Ok(PyPdfPageRegion {
doc: slf,
page_index: page,
region: crate::geometry::Rect::new(bbox.0, bbox.1, bbox.2, bbox.3),
})
}
#[pyo3(signature = (
page,
dpi=None,
format=None,
background=None,
transparent=false,
render_annotations=None,
jpeg_quality=None,
))]
#[allow(clippy::too_many_arguments)]
fn render_page(
&mut self,
page: usize,
dpi: Option<u32>,
format: Option<&str>,
background: Option<(f32, f32, f32, f32)>,
transparent: bool,
render_annotations: Option<bool>,
jpeg_quality: Option<u8>,
) -> PyResult<Vec<u8>> {
#[cfg(feature = "rendering")]
{
use pyo3::exceptions::PyValueError;
let quality = match jpeg_quality {
Some(q) => {
if !(1..=100).contains(&q) {
return Err(PyValueError::new_err(format!(
"jpeg_quality must be 1-100, got {q}",
)));
}
q
},
None => 85,
};
let mut options = crate::rendering::RenderOptions::with_dpi(dpi.unwrap_or(72));
if let Some(fmt) = format {
match fmt.to_lowercase().as_str() {
"jpeg" | "jpg" => {
options = options.as_jpeg(quality);
},
"png" => { },
_ => {
return Err(PyValueError::new_err(format!(
"format must be 'png' or 'jpeg', got {fmt:?}",
)))
},
}
}
if let Some((r, g, b, a)) = background {
options.background = Some([r, g, b, a]);
}
if transparent {
options.background = None;
}
if let Some(flag) = render_annotations {
options.render_annotations = flag;
}
crate::rendering::render_page(&mut self.inner, page, &options)
.map(|img| img.data)
.map_err(|e| PyRuntimeError::new_err(format!("Failed to render page: {e}")))
}
#[cfg(not(feature = "rendering"))]
{
let _ = (page, dpi, format, background, transparent, render_annotations, jpeg_quality);
Err(PyRuntimeError::new_err("Rendering feature not enabled."))
}
}
#[pyo3(signature = (page, region=None))]
fn extract_chars(
&mut self,
page: usize,
region: Option<(f32, f32, f32, f32)>,
) -> PyResult<Vec<PyTextChar>> {
let chars_result = if let Some((x, y, w, h)) = region {
self.inner.extract_chars_in_rect(
page,
crate::geometry::Rect::new(x, y, w, h),
crate::layout::RectFilterMode::Intersects,
)
} else {
self.inner.extract_chars(page)
};
chars_result
.map(|chars| {
chars
.into_iter()
.map(|ch| PyTextChar { inner: ch })
.collect()
})
.map_err(|e| PyRuntimeError::new_err(format!("Failed to extract characters: {}", e)))
}
#[pyo3(signature = (page, region=None, word_gap_threshold=None, profile=None))]
fn extract_words(
&mut self,
page: usize,
region: Option<(f32, f32, f32, f32)>,
word_gap_threshold: Option<f32>,
profile: Option<PyExtractionProfile>,
) -> PyResult<Vec<PyWord>> {
use crate::layout::{RectFilterMode, SpatialCollectionFiltering};
let words = self
.inner
.extract_words_with_thresholds(page, word_gap_threshold, profile.map(|p| p.inner))
.map_err(|e| PyRuntimeError::new_err(format!("Failed to extract words: {}", e)))?;
let filtered = if let Some((x, y, w, h)) = region {
let rect = crate::geometry::Rect::new(x, y, w, h);
words.filter_by_rect(&rect, RectFilterMode::Intersects)
} else {
words
};
Ok(filtered.into_iter().map(|w| PyWord { inner: w }).collect())
}
#[pyo3(signature = (page, region=None, word_gap_threshold=None, line_gap_threshold=None, profile=None))]
fn extract_text_lines(
&mut self,
page: usize,
region: Option<(f32, f32, f32, f32)>,
word_gap_threshold: Option<f32>,
line_gap_threshold: Option<f32>,
profile: Option<PyExtractionProfile>,
) -> PyResult<Vec<PyTextLine>> {
use crate::layout::{RectFilterMode, SpatialCollectionFiltering};
let lines = self
.inner
.extract_text_lines_with_thresholds(
page,
word_gap_threshold,
line_gap_threshold,
profile.map(|p| p.inner),
)
.map_err(|e| PyRuntimeError::new_err(format!("Failed to extract lines: {}", e)))?;
let filtered = if let Some((x, y, w, h)) = region {
let rect = crate::geometry::Rect::new(x, y, w, h);
lines.filter_by_rect(&rect, RectFilterMode::Intersects)
} else {
lines
};
Ok(filtered
.into_iter()
.map(|l| PyTextLine { inner: l })
.collect())
}
fn page_layout_params(&mut self, page: usize) -> PyResult<PyLayoutParams> {
use crate::layout::{AdaptiveLayoutParams, DocumentProperties};
let spans = self
.inner
.extract_spans(page)
.map_err(|e| PyRuntimeError::new_err(format!("Failed to extract spans: {}", e)))?;
let media_box = self
.inner
.get_page_media_box(page)
.unwrap_or((0.0, 0.0, 612.0, 792.0));
let page_bbox =
crate::geometry::Rect::new(media_box.0, media_box.1, media_box.2, media_box.3);
let all_chars: Vec<_> = spans.iter().flat_map(|s| s.to_chars()).collect();
let props = DocumentProperties::analyze(&all_chars, page_bbox)
.map_err(|e| PyRuntimeError::new_err(format!("Layout analysis failed: {}", e)))?;
let params = AdaptiveLayoutParams::from_properties(&props);
Ok(PyLayoutParams {
word_gap_threshold: params.word_gap_threshold,
line_gap_threshold: params.line_gap_threshold,
median_char_width: props.median_char_width,
median_font_size: props.median_font_size,
median_line_spacing: props.median_line_spacing,
column_count: props.column_count,
})
}
fn has_structure_tree(&mut self) -> bool {
self.inner.structure_tree().ok().flatten().is_some()
}
#[pyo3(signature = (page, preserve_layout=false, detect_headings=true, include_images=false, image_output_dir=None))]
fn to_plain_text(
&mut self,
page: usize,
preserve_layout: bool,
detect_headings: bool,
include_images: bool,
image_output_dir: Option<String>,
) -> PyResult<String> {
let options = RustConversionOptions {
preserve_layout,
detect_headings,
extract_tables: true,
include_images,
image_output_dir,
..Default::default()
};
self.inner
.to_plain_text(page, &options)
.map_err(|e| PyRuntimeError::new_err(format!("Failed to convert to plain text: {}", e)))
}
#[pyo3(signature = (preserve_layout=false, detect_headings=true, include_images=false, image_output_dir=None))]
fn to_plain_text_all(
&mut self,
preserve_layout: bool,
detect_headings: bool,
include_images: bool,
image_output_dir: Option<String>,
) -> PyResult<String> {
let options = RustConversionOptions {
preserve_layout,
detect_headings,
extract_tables: true,
include_images,
image_output_dir,
..Default::default()
};
self.inner.to_plain_text_all(&options).map_err(|e| {
PyRuntimeError::new_err(format!("Failed to convert all pages to plain text: {}", e))
})
}
#[pyo3(signature = (page, preserve_layout=false, detect_headings=true, include_images=false, image_output_dir=None, embed_images=true, include_form_fields=true))]
fn to_markdown(
&mut self,
page: usize,
preserve_layout: bool,
detect_headings: bool,
include_images: bool,
image_output_dir: Option<String>,
embed_images: bool,
include_form_fields: bool,
) -> PyResult<String> {
let options = RustConversionOptions {
preserve_layout,
detect_headings,
extract_tables: true,
include_images,
image_output_dir,
embed_images,
include_form_fields,
..Default::default()
};
self.inner
.to_markdown(page, &options)
.map_err(|e| PyRuntimeError::new_err(format!("Failed to convert to Markdown: {}", e)))
}
#[pyo3(signature = (page, preserve_layout=false, detect_headings=true, include_images=false, image_output_dir=None, embed_images=true, include_form_fields=true))]
fn to_html(
&mut self,
page: usize,
preserve_layout: bool,
detect_headings: bool,
include_images: bool,
image_output_dir: Option<String>,
embed_images: bool,
include_form_fields: bool,
) -> PyResult<String> {
let options = RustConversionOptions {
preserve_layout,
detect_headings,
extract_tables: true,
include_images,
image_output_dir,
embed_images,
include_form_fields,
..Default::default()
};
self.inner
.to_html(page, &options)
.map_err(|e| PyRuntimeError::new_err(format!("Failed to convert to HTML: {}", e)))
}
#[pyo3(signature = (preserve_layout=false, detect_headings=true, include_images=false, image_output_dir=None, embed_images=true, include_form_fields=true))]
fn to_markdown_all(
&mut self,
preserve_layout: bool,
detect_headings: bool,
include_images: bool,
image_output_dir: Option<String>,
embed_images: bool,
include_form_fields: bool,
) -> PyResult<String> {
let options = RustConversionOptions {
preserve_layout,
detect_headings,
extract_tables: true,
include_images,
image_output_dir,
embed_images,
include_form_fields,
..Default::default()
};
self.inner.to_markdown_all(&options).map_err(|e| {
PyRuntimeError::new_err(format!("Failed to convert all pages to Markdown: {}", e))
})
}
#[pyo3(signature = (preserve_layout=false, detect_headings=true, include_images=false, image_output_dir=None, embed_images=true, include_form_fields=true))]
fn to_html_all(
&mut self,
preserve_layout: bool,
detect_headings: bool,
include_images: bool,
image_output_dir: Option<String>,
embed_images: bool,
include_form_fields: bool,
) -> PyResult<String> {
let options = RustConversionOptions {
preserve_layout,
detect_headings,
extract_tables: true,
include_images,
image_output_dir,
embed_images,
include_form_fields,
..Default::default()
};
self.inner.to_html_all(&options).map_err(|e| {
PyRuntimeError::new_err(format!("Failed to convert all pages to HTML: {}", e))
})
}
fn page(&mut self, index: usize) -> PyResult<PyPdfPage> {
self.ensure_editor()?;
let editor = self.editor.as_mut().ok_or_else(|| {
PyRuntimeError::new_err("Internal error: editor missing after initialization")
})?;
let page = editor
.get_page(index)
.map_err(|e| PyRuntimeError::new_err(format!("Failed to get page: {}", e)))?;
Ok(PyPdfPage { inner: page })
}
fn save_page(&mut self, page: &PyPdfPage) -> PyResult<()> {
self.ensure_editor()?;
let editor = self.editor.as_mut().ok_or_else(|| {
PyRuntimeError::new_err("Internal error: editor missing after initialization")
})?;
editor
.save_page(page.inner.clone())
.map_err(|e| PyRuntimeError::new_err(format!("Failed to save page: {}", e)))
}
fn save(&mut self, path: &str) -> PyResult<()> {
use crate::editor::EditableDocument;
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
editor
.save(path)
.map_err(|e| PyIOError::new_err(format!("Failed to save PDF: {}", e)))
} else {
Err(PyRuntimeError::new_err("No editor initialized."))
}
}
#[pyo3(signature = (path, user_password, owner_password=None, allow_print=true, allow_copy=true, allow_modify=true, allow_annotate=true))]
fn save_encrypted(
&mut self,
path: &str,
user_password: &str,
owner_password: Option<&str>,
allow_print: bool,
allow_copy: bool,
allow_modify: bool,
allow_annotate: bool,
) -> PyResult<()> {
use crate::editor::{
EditableDocument, EncryptionAlgorithm, EncryptionConfig, Permissions, SaveOptions,
};
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
let owner_pwd = owner_password.unwrap_or(user_password);
let permissions = Permissions {
print: allow_print,
print_high_quality: allow_print,
modify: allow_modify,
copy: allow_copy,
annotate: allow_annotate,
fill_forms: allow_annotate,
accessibility: true,
assemble: allow_modify,
};
let config = EncryptionConfig::new(user_password, owner_pwd)
.with_algorithm(EncryptionAlgorithm::Aes256)
.with_permissions(permissions);
let options = SaveOptions::with_encryption(config);
editor
.save_with_options(path, options)
.map_err(|e| PyIOError::new_err(format!("Failed to save encrypted PDF: {}", e)))
} else {
Err(PyRuntimeError::new_err("No editor initialized."))
}
}
fn set_title(&mut self, title: &str) -> PyResult<()> {
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
editor.set_title(title);
}
Ok(())
}
fn set_author(&mut self, author: &str) -> PyResult<()> {
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
editor.set_author(author);
}
Ok(())
}
fn set_subject(&mut self, subject: &str) -> PyResult<()> {
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
editor.set_subject(subject);
}
Ok(())
}
fn set_keywords(&mut self, keywords: &str) -> PyResult<()> {
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
editor.set_keywords(keywords);
}
Ok(())
}
fn page_rotation(&mut self, page: usize) -> PyResult<i32> {
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
editor
.get_page_rotation(page)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
} else {
Err(PyRuntimeError::new_err("No editor initialized."))
}
}
fn set_page_rotation(&mut self, page: usize, degrees: i32) -> PyResult<()> {
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
editor
.set_page_rotation(page, degrees)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
} else {
Ok(())
}
}
fn rotate_page(&mut self, page: usize, degrees: i32) -> PyResult<()> {
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
editor
.rotate_page_by(page, degrees)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
} else {
Ok(())
}
}
fn rotate_all_pages(&mut self, degrees: i32) -> PyResult<()> {
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
editor
.rotate_all_pages(degrees)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
} else {
Ok(())
}
}
fn page_media_box(&mut self, page: usize) -> PyResult<(f32, f32, f32, f32)> {
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
let b = editor
.get_page_media_box(page)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
Ok((b[0], b[1], b[2], b[3]))
} else {
Err(PyRuntimeError::new_err("No editor initialized."))
}
}
fn set_page_media_box(
&mut self,
page: usize,
llx: f32,
lly: f32,
urx: f32,
ury: f32,
) -> PyResult<()> {
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
editor
.set_page_media_box(page, [llx, lly, urx, ury])
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
} else {
Ok(())
}
}
fn page_crop_box(&mut self, page: usize) -> PyResult<Option<(f32, f32, f32, f32)>> {
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
let b = editor
.get_page_crop_box(page)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
Ok(b.map(|v| (v[0], v[1], v[2], v[3])))
} else {
Ok(None)
}
}
fn set_page_crop_box(
&mut self,
page: usize,
llx: f32,
lly: f32,
urx: f32,
ury: f32,
) -> PyResult<()> {
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
editor
.set_page_crop_box(page, [llx, lly, urx, ury])
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
} else {
Ok(())
}
}
fn crop_margins(&mut self, left: f32, right: f32, top: f32, bottom: f32) -> PyResult<()> {
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
editor
.crop_margins(left, right, top, bottom)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
} else {
Ok(())
}
}
fn erase_region(
&mut self,
page: usize,
llx: f32,
lly: f32,
urx: f32,
ury: f32,
) -> PyResult<()> {
let rect = crate::geometry::Rect::new(llx, lly, urx - llx, ury - lly);
self.inner
.erase_region(page, rect)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
editor
.erase_region(page, [llx, lly, urx, ury])
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
}
Ok(())
}
fn erase_regions(&mut self, page: usize, rects: Vec<(f32, f32, f32, f32)>) -> PyResult<()> {
for (llx, lly, urx, ury) in &rects {
let rect = crate::geometry::Rect::new(*llx, *lly, *urx - *llx, *ury - *lly);
self.inner
.erase_region(page, rect)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
}
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
let arrays: Vec<[f32; 4]> = rects.iter().map(|r| [r.0, r.1, r.2, r.3]).collect();
editor
.erase_regions(page, &arrays)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
}
Ok(())
}
fn clear_erase_regions(&mut self, page: usize) -> PyResult<()> {
self.inner
.clear_erase_regions(page)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
if let Some(ref mut editor) = self.editor {
editor.clear_erase_regions(page);
}
Ok(())
}
fn flatten_page_annotations(&mut self, page: usize) -> PyResult<()> {
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
editor
.flatten_page_annotations(page)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
}
Ok(())
}
fn flatten_all_annotations(&mut self) -> PyResult<()> {
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
editor
.flatten_all_annotations()
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
}
Ok(())
}
fn is_page_marked_for_flatten(&self, page: usize) -> bool {
self.editor
.as_ref()
.is_some_and(|e| e.is_page_marked_for_flatten(page))
}
fn unmark_page_for_flatten(&mut self, page: usize) {
if let Some(ref mut editor) = self.editor {
editor.unmark_page_for_flatten(page);
}
}
fn apply_page_redactions(&mut self, page: usize) -> PyResult<()> {
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
editor
.apply_page_redactions(page)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
}
Ok(())
}
fn apply_all_redactions(&mut self) -> PyResult<()> {
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
editor
.apply_all_redactions()
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
}
Ok(())
}
fn is_page_marked_for_redaction(&self, page: usize) -> bool {
self.editor
.as_ref()
.is_some_and(|e| e.is_page_marked_for_redaction(page))
}
fn unmark_page_for_redaction(&mut self, page: usize) {
if let Some(ref mut editor) = self.editor {
editor.unmark_page_for_redaction(page);
}
}
fn page_images(&mut self, page: usize, py: Python<'_>) -> PyResult<Py<PyAny>> {
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
let images = editor
.get_page_images(page)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
let list = pyo3::types::PyList::empty(py);
for img in images {
let dict = pyo3::types::PyDict::new(py);
dict.set_item("name", &img.name)?;
dict.set_item("x", img.bounds[0])?;
dict.set_item("y", img.bounds[1])?;
dict.set_item("width", img.bounds[2])?;
dict.set_item("height", img.bounds[3])?;
dict.set_item(
"matrix",
(
img.matrix[0],
img.matrix[1],
img.matrix[2],
img.matrix[3],
img.matrix[4],
img.matrix[5],
),
)?;
list.append(dict)?;
}
Ok(list.into())
} else {
Err(PyRuntimeError::new_err("No editor initialized."))
}
}
fn reposition_image(&mut self, page: usize, image_name: &str, x: f32, y: f32) -> PyResult<()> {
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
editor
.reposition_image(page, image_name, x, y)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
}
Ok(())
}
fn resize_image(
&mut self,
page: usize,
image_name: &str,
width: f32,
height: f32,
) -> PyResult<()> {
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
editor
.resize_image(page, image_name, width, height)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
}
Ok(())
}
fn set_image_bounds(
&mut self,
page: usize,
image_name: &str,
x: f32,
y: f32,
width: f32,
height: f32,
) -> PyResult<()> {
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
editor
.set_image_bounds(page, image_name, x, y, width, height)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
}
Ok(())
}
fn clear_image_modifications(&mut self, page: usize) {
if let Some(ref mut editor) = self.editor {
editor.clear_image_modifications(page);
}
}
fn has_image_modifications(&self, page: usize) -> bool {
self.editor
.as_ref()
.is_some_and(|e| e.has_image_modifications(page))
}
#[pyo3(signature = (pattern, case_insensitive=false, literal=false, whole_word=false, max_results=0))]
fn search(
&mut self,
py: Python<'_>,
pattern: &str,
case_insensitive: bool,
literal: bool,
whole_word: bool,
max_results: usize,
) -> PyResult<Py<PyAny>> {
use crate::search::{SearchOptions, TextSearcher};
let opts = SearchOptions::new()
.with_case_insensitive(case_insensitive)
.with_literal(literal)
.with_whole_word(whole_word)
.with_max_results(max_results);
let results = TextSearcher::search(&mut self.inner, pattern, &opts)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
let list = pyo3::types::PyList::empty(py);
for r in results {
let d = pyo3::types::PyDict::new(py);
d.set_item("page", r.page)?;
d.set_item("text", &r.text)?;
d.set_item("x", r.bbox.x)?;
d.set_item("y", r.bbox.y)?;
d.set_item("width", r.bbox.width)?;
d.set_item("height", r.bbox.height)?;
list.append(d)?;
}
Ok(list.into())
}
#[pyo3(signature = (page, pattern, case_insensitive=false, literal=false, whole_word=false, max_results=0))]
fn search_page(
&mut self,
py: Python<'_>,
page: usize,
pattern: &str,
case_insensitive: bool,
literal: bool,
whole_word: bool,
max_results: usize,
) -> PyResult<Py<PyAny>> {
use crate::search::{SearchOptions, TextSearcher};
let opts = SearchOptions::new()
.with_case_insensitive(case_insensitive)
.with_literal(literal)
.with_whole_word(whole_word)
.with_max_results(max_results)
.with_page_range(page, page);
let results = TextSearcher::search(&mut self.inner, pattern, &opts)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
let list = pyo3::types::PyList::empty(py);
for r in results {
let d = pyo3::types::PyDict::new(py);
d.set_item("page", r.page)?;
d.set_item("text", &r.text)?;
d.set_item("x", r.bbox.x)?;
d.set_item("y", r.bbox.y)?;
d.set_item("width", r.bbox.width)?;
d.set_item("height", r.bbox.height)?;
list.append(d)?;
}
Ok(list.into())
}
#[pyo3(signature = (page, region=None))]
fn extract_images(
&mut self,
py: Python<'_>,
page: usize,
region: Option<(f32, f32, f32, f32)>,
) -> PyResult<Py<PyAny>> {
let res = if let Some(r) = region {
self.inner
.extract_images_in_rect(page, crate::geometry::Rect::new(r.0, r.1, r.2, r.3))
} else {
self.inner.extract_images(page)
};
let images = res.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
let list = pyo3::types::PyList::empty(py);
for img in &images {
let d = pyo3::types::PyDict::new(py);
d.set_item("width", img.width())?;
d.set_item("height", img.height())?;
d.set_item("color_space", format!("{:?}", img.color_space()))?;
d.set_item("bits_per_component", img.bits_per_component())?;
if let Some(b) = img.bbox() {
d.set_item("bbox", (b.x, b.y, b.width, b.height))?;
} else {
d.set_item("bbox", py.None())?;
}
d.set_item("rotation", img.rotation_degrees())?;
d.set_item("matrix", img.matrix())?;
list.append(d)?;
}
Ok(list.into())
}
#[pyo3(signature = (page, region=None, table_settings=None))]
fn extract_tables(
&mut self,
py: Python<'_>,
page: usize,
region: Option<(f32, f32, f32, f32)>,
table_settings: Option<Bound<'_, pyo3::types::PyDict>>,
) -> PyResult<Py<PyAny>> {
let config = table_settings_to_config(table_settings)?;
let res = if let Some(r) = region {
self.inner.extract_tables_in_rect_with_config(
page,
crate::geometry::Rect::new(r.0, r.1, r.2, r.3),
config,
)
} else {
self.inner.extract_tables_with_config(page, config)
};
let tables = res.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
let list = pyo3::types::PyList::empty(py);
for t in &tables {
let d = pyo3::types::PyDict::new(py);
d.set_item("col_count", t.col_count)?;
d.set_item("row_count", t.rows.len())?;
if let Some(b) = t.bbox {
d.set_item("bbox", (b.x, b.y, b.width, b.height))?;
} else {
d.set_item("bbox", py.None())?;
}
d.set_item("has_header", t.has_header)?;
let rows = pyo3::types::PyList::empty(py);
for r in &t.rows {
let rd = pyo3::types::PyDict::new(py);
rd.set_item("is_header", r.is_header)?;
let cells = pyo3::types::PyList::empty(py);
for c in &r.cells {
let cd = pyo3::types::PyDict::new(py);
cd.set_item("text", &c.text)?;
if let Some(b) = c.bbox {
cd.set_item("bbox", (b.x, b.y, b.width, b.height))?;
}
cells.append(cd)?;
}
rd.set_item("cells", cells)?;
rows.append(rd)?;
}
d.set_item("rows", rows)?;
list.append(d)?;
}
Ok(list.into())
}
#[pyo3(signature = (page, region=None, reading_order=None))]
fn extract_spans(
&mut self,
page: usize,
region: Option<(f32, f32, f32, f32)>,
reading_order: Option<&str>,
) -> PyResult<Vec<PyTextSpan>> {
let order = match reading_order {
Some("column_aware") => crate::document::ReadingOrder::ColumnAware,
Some("top_to_bottom") | None => crate::document::ReadingOrder::TopToBottom,
Some(other) => {
return Err(PyRuntimeError::new_err(format!(
"Unknown reading_order '{}'. Expected 'top_to_bottom' or 'column_aware'.",
other
)));
},
};
let res = if let Some(r) = region {
self.inner.extract_spans_in_rect(
page,
crate::geometry::Rect::new(r.0, r.1, r.2, r.3),
crate::layout::RectFilterMode::Intersects,
)
} else {
self.inner.extract_spans_with_reading_order(page, order)
};
res.map(|spans| spans.into_iter().map(|s| PyTextSpan { inner: s }).collect())
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
}
#[pyo3(signature = (page, reading_order=None))]
fn extract_page_text(
&mut self,
py: Python<'_>,
page: usize,
reading_order: Option<&str>,
) -> PyResult<Py<PyAny>> {
let order = match reading_order {
Some("column_aware") => crate::document::ReadingOrder::ColumnAware,
Some("top_to_bottom") | None => crate::document::ReadingOrder::TopToBottom,
Some(other) => {
return Err(PyRuntimeError::new_err(format!(
"Unknown reading_order '{}'. Expected 'top_to_bottom' or 'column_aware'.",
other
)));
},
};
let page_text = self
.inner
.extract_page_text_with_options(page, order)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
let dict = pyo3::types::PyDict::new(py);
let spans_list: Vec<PyTextSpan> = page_text
.spans
.into_iter()
.map(|s| PyTextSpan { inner: s })
.collect();
dict.set_item("spans", spans_list)?;
let chars_list: Vec<PyTextChar> = page_text
.chars
.into_iter()
.map(|ch| PyTextChar { inner: ch })
.collect();
dict.set_item("chars", chars_list)?;
dict.set_item("page_width", page_text.page_width)?;
dict.set_item("page_height", page_text.page_height)?;
Ok(dict.into())
}
fn get_outline(&mut self, py: Python<'_>) -> PyResult<Option<Py<PyAny>>> {
let outline = self
.inner
.get_outline()
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
match outline {
Some(items) => Ok(Some(outline_items_to_py(py, &items)?)),
None => Ok(None),
}
}
fn get_annotations(&mut self, py: Python<'_>, page: usize) -> PyResult<Py<PyAny>> {
let annos = self
.inner
.get_annotations(page)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
let list = pyo3::types::PyList::empty(py);
for a in &annos {
let d = pyo3::types::PyDict::new(py);
if let Some(ref s) = a.subtype {
d.set_item("subtype", s)?;
}
if let Some(ref c) = a.contents {
d.set_item("contents", c)?;
}
if let Some(r) = a.rect {
d.set_item("rect", (r[0], r[1], r[2], r[3]))?;
}
if let Some(ref au) = a.author {
d.set_item("author", au)?;
}
if let Some(ref d1) = a.creation_date {
d.set_item("creation_date", d1)?;
}
if let Some(ref d2) = a.modification_date {
d.set_item("modification_date", d2)?;
}
if let Some(ref s) = a.subject {
d.set_item("subject", s)?;
}
if let Some(ref c) = a.color {
if c.len() >= 3 {
d.set_item("color", (c[0], c[1], c[2]))?;
}
}
if let Some(o) = a.opacity {
d.set_item("opacity", o)?;
}
if let Some(ref f) = a.field_type {
d.set_item("field_type", format!("{:?}", f))?;
}
if let Some(ref n) = a.field_name {
d.set_item("field_name", n)?;
}
if let Some(ref v) = a.field_value {
d.set_item("field_value", v)?;
}
if let Some(crate::annotations::LinkAction::Uri(ref u)) = a.action {
d.set_item("action_uri", u)?;
}
list.append(d)?;
}
Ok(list.into())
}
#[pyo3(signature = (page, region=None))]
fn extract_paths(
&mut self,
py: Python<'_>,
page: usize,
region: Option<(f32, f32, f32, f32)>,
) -> PyResult<Py<PyAny>> {
let res = if let Some(r) = region {
self.inner
.extract_paths_in_rect(page, crate::geometry::Rect::new(r.0, r.1, r.2, r.3))
} else {
self.inner.extract_paths(page)
};
let paths = res.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
let list = pyo3::types::PyList::empty(py);
for p in &paths {
list.append(path_to_py_dict(py, p)?)?;
}
Ok(list.into())
}
#[pyo3(signature = (page, region=None))]
fn extract_rects(
&mut self,
py: Python<'_>,
page: usize,
region: Option<(f32, f32, f32, f32)>,
) -> PyResult<Py<PyAny>> {
let res = if let Some(r) = region {
self.inner
.extract_rects_in_rect(page, crate::geometry::Rect::new(r.0, r.1, r.2, r.3))
} else {
self.inner.extract_rects(page)
};
let paths = res.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
let list = pyo3::types::PyList::empty(py);
for p in &paths {
list.append(path_to_py_dict(py, p)?)?;
}
Ok(list.into())
}
#[pyo3(signature = (page, region=None))]
fn extract_lines(
&mut self,
py: Python<'_>,
page: usize,
region: Option<(f32, f32, f32, f32)>,
) -> PyResult<Py<PyAny>> {
let res = if let Some(r) = region {
self.inner
.extract_lines_in_rect(page, crate::geometry::Rect::new(r.0, r.1, r.2, r.3))
} else {
self.inner.extract_lines(page)
};
let paths = res.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
let list = pyo3::types::PyList::empty(py);
for p in &paths {
list.append(path_to_py_dict(py, p)?)?;
}
Ok(list.into())
}
#[pyo3(signature = (page, engine=None))]
fn extract_text_ocr(
&mut self,
_py: Python<'_>,
page: usize,
engine: Option<Bound<'_, PyAny>>,
) -> PyResult<String> {
#[cfg(feature = "ocr")]
{
let ocr_engine = if let Some(eng) = engine {
Some(eng.extract::<PyRef<PyOcrEngine>>()?)
} else {
None
};
let engine_inner = ocr_engine.as_ref().map(|e| &e.inner);
let options = crate::ocr::OcrExtractOptions::default();
self.inner
.extract_text_with_ocr(page, engine_inner, options)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
}
#[cfg(not(feature = "ocr"))]
{
let _ = (engine, page);
Err(PyRuntimeError::new_err("OCR feature not enabled."))
}
}
fn get_form_fields(&mut self) -> PyResult<Vec<PyFormField>> {
use crate::extractors::forms::FormExtractor;
let fields = FormExtractor::extract_fields(&mut self.inner)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
Ok(fields
.into_iter()
.map(|f| PyFormField { inner: f })
.collect())
}
fn get_form_field_value(&mut self, name: &str, py: Python<'_>) -> PyResult<Py<PyAny>> {
self.ensure_editor()?;
let editor = self.editor.as_mut().ok_or_else(|| {
PyRuntimeError::new_err("Internal error: editor missing after initialization")
})?;
let value = editor
.get_form_field_value(name)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
match value {
Some(v) => form_field_value_to_python(&v, py),
None => Ok(py.None()),
}
}
fn set_form_field_value(&mut self, name: &str, value: &Bound<'_, PyAny>) -> PyResult<()> {
self.ensure_editor()?;
let editor = self.editor.as_mut().ok_or_else(|| {
PyRuntimeError::new_err("Internal error: editor missing after initialization")
})?;
let field_value = python_to_form_field_value(value)?;
editor
.set_form_field_value(name, field_value)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
}
fn has_xfa(&mut self) -> PyResult<bool> {
use crate::xfa::XfaExtractor;
XfaExtractor::has_xfa(&mut self.inner).map_err(|e| PyRuntimeError::new_err(e.to_string()))
}
#[pyo3(signature = (path, format="fdf"))]
fn export_form_data(&mut self, path: &str, format: &str) -> PyResult<()> {
self.ensure_editor()?;
let editor = self.editor.as_mut().ok_or_else(|| {
PyRuntimeError::new_err("Internal error: editor missing after initialization")
})?;
match format {
"fdf" => editor
.export_form_data_fdf(path)
.map_err(|e| PyRuntimeError::new_err(e.to_string())),
"xfdf" => editor
.export_form_data_xfdf(path)
.map_err(|e| PyRuntimeError::new_err(e.to_string())),
_ => Err(PyRuntimeError::new_err("Unknown format.")),
}
}
fn extract_image_bytes(&mut self, py: Python<'_>, page: usize) -> PyResult<Py<PyAny>> {
let images = self
.inner
.extract_images(page)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
let list = pyo3::types::PyList::empty(py);
for img in &images {
let png_data = img
.to_png_bytes()
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
let d = pyo3::types::PyDict::new(py);
d.set_item("width", img.width())?;
d.set_item("height", img.height())?;
d.set_item("format", "png")?;
d.set_item("data", pyo3::types::PyBytes::new(py, &png_data))?;
list.append(d)?;
}
Ok(list.into())
}
fn flatten_forms(&mut self) -> PyResult<()> {
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
editor
.flatten_forms()
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
}
Ok(())
}
fn flatten_forms_on_page(&mut self, page: usize) -> PyResult<()> {
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
editor
.flatten_forms_on_page(page)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
}
Ok(())
}
fn merge_from(&mut self, source: &Bound<'_, PyAny>) -> PyResult<usize> {
self.ensure_editor()?;
let editor = self.editor.as_mut().ok_or_else(|| {
PyRuntimeError::new_err("Internal error: editor missing after initialization")
})?;
if let Ok(path) = source.extract::<String>() {
editor
.merge_from(&path)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
} else if let Ok(data) = source.extract::<Vec<u8>>() {
editor
.merge_from_bytes(&data)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
} else {
Err(PyRuntimeError::new_err("Invalid source."))
}
}
fn embed_file(&mut self, name: &str, data: &Bound<'_, PyBytes>) -> PyResult<()> {
self.ensure_editor()?;
if let Some(ref mut editor) = self.editor {
editor
.embed_file(name, data.as_bytes().to_vec())
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
}
Ok(())
}
fn page_labels(&mut self, py: Python<'_>) -> PyResult<Py<PyAny>> {
use crate::extractors::page_labels::PageLabelExtractor;
let labels = PageLabelExtractor::extract(&mut self.inner)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
let list = pyo3::types::PyList::empty(py);
for l in &labels {
let d = pyo3::types::PyDict::new(py);
d.set_item("start_page", l.start_page)?;
d.set_item("style", format!("{:?}", l.style))?;
if let Some(ref p) = l.prefix {
d.set_item("prefix", p)?;
} else {
d.set_item("prefix", py.None())?;
}
d.set_item("start_value", l.start_value)?;
list.append(d)?;
}
Ok(list.into())
}
fn xmp_metadata(&mut self, py: Python<'_>) -> PyResult<Py<PyAny>> {
use crate::extractors::xmp::XmpExtractor;
let meta = XmpExtractor::extract(&mut self.inner)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
match meta {
Some(xmp) => {
let d = pyo3::types::PyDict::new(py);
if let Some(ref t) = xmp.dc_title {
d.set_item("dc_title", t)?;
}
if !xmp.dc_creator.is_empty() {
d.set_item("dc_creator", &xmp.dc_creator)?;
}
if let Some(ref desc) = xmp.dc_description {
d.set_item("dc_description", desc)?;
}
if !xmp.dc_subject.is_empty() {
d.set_item("dc_subject", &xmp.dc_subject)?;
}
if let Some(ref l) = xmp.dc_language {
d.set_item("dc_language", l)?;
}
if let Some(ref t) = xmp.xmp_creator_tool {
d.set_item("xmp_creator_tool", t)?;
}
if let Some(ref d1) = xmp.xmp_create_date {
d.set_item("xmp_create_date", d1)?;
}
if let Some(ref d2) = xmp.xmp_modify_date {
d.set_item("xmp_modify_date", d2)?;
}
if let Some(ref p) = xmp.pdf_producer {
d.set_item("pdf_producer", p)?;
}
if let Some(ref k) = xmp.pdf_keywords {
d.set_item("pdf_keywords", k)?;
}
Ok(d.into())
},
None => Ok(py.None()),
}
}
#[pyo3(signature = (level="1b"))]
fn validate_pdf_a(&mut self, py: Python<'_>, level: &str) -> PyResult<Py<PyAny>> {
use crate::compliance::pdf_a::validate_pdf_a;
use crate::compliance::types::PdfALevel;
let pdf_level = match level {
"1a" => PdfALevel::A1a,
"1b" => PdfALevel::A1b,
"2a" => PdfALevel::A2a,
"2b" => PdfALevel::A2b,
"2u" => PdfALevel::A2u,
"3a" => PdfALevel::A3a,
"3b" => PdfALevel::A3b,
"3u" => PdfALevel::A3u,
_ => {
return Err(PyValueError::new_err(format!(
"Unknown PDF/A level: '{}'. Use 1a, 1b, 2a, 2b, 2u, 3a, 3b, 3u",
level
)))
},
};
let result = validate_pdf_a(&mut self.inner, pdf_level)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
let d = pyo3::types::PyDict::new(py);
d.set_item("valid", result.errors.is_empty())?;
d.set_item("level", level)?;
let errors: Vec<String> = result.errors.iter().map(|e| e.to_string()).collect();
let warnings: Vec<String> = result.warnings.iter().map(|w| w.to_string()).collect();
d.set_item("errors", errors)?;
d.set_item("warnings", warnings)?;
Ok(d.into())
}
fn validate_pdf_ua(&mut self, py: Python<'_>) -> PyResult<Py<PyAny>> {
use crate::compliance::pdf_ua::validate_pdf_ua;
let result = validate_pdf_ua(&mut self.inner, crate::compliance::pdf_ua::PdfUaLevel::Ua1)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
let d = pyo3::types::PyDict::new(py);
d.set_item("valid", result.errors.is_empty())?;
let errors: Vec<String> = result.errors.iter().map(|e| e.to_string()).collect();
let warnings: Vec<String> = result.warnings.iter().map(|w| w.to_string()).collect();
d.set_item("errors", errors)?;
d.set_item("warnings", warnings)?;
Ok(d.into())
}
#[pyo3(signature = (level="1a_2001"))]
fn validate_pdf_x(&mut self, py: Python<'_>, level: &str) -> PyResult<Py<PyAny>> {
use crate::compliance::pdf_x::types::PdfXLevel;
use crate::compliance::pdf_x::validator::validate_pdf_x;
let pdf_level = match level {
"1a_2001" => PdfXLevel::X1a2001,
"3_2002" => PdfXLevel::X32002,
"4" => PdfXLevel::X4,
_ => {
return Err(PyValueError::new_err(format!(
"Unknown PDF/X level: '{}'. Use 1a_2001, 3_2002, 4",
level
)))
},
};
let result = validate_pdf_x(&mut self.inner, pdf_level)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
let d = pyo3::types::PyDict::new(py);
d.set_item("valid", result.errors.is_empty())?;
d.set_item("level", level)?;
let errors: Vec<String> = result.errors.iter().map(|e| e.to_string()).collect();
let warnings: Vec<String> = result.warnings.iter().map(|w| w.to_string()).collect();
d.set_item("errors", errors)?;
d.set_item("warnings", warnings)?;
Ok(d.into())
}
fn extract_pages(&mut self, pages: Vec<usize>, output: &str) -> PyResult<()> {
self.ensure_editor()?;
let editor = self.editor.as_mut().ok_or_else(|| {
PyRuntimeError::new_err("Internal error: editor missing after initialization")
})?;
editor
.extract_pages(&pages, output)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
}
fn delete_page(&mut self, index: usize) -> PyResult<()> {
use crate::editor::EditableDocument;
self.ensure_editor()?;
let editor = self.editor.as_mut().ok_or_else(|| {
PyRuntimeError::new_err("Internal error: editor missing after initialization")
})?;
editor
.remove_page(index)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
}
fn move_page(&mut self, from_index: usize, to_index: usize) -> PyResult<()> {
use crate::editor::EditableDocument;
self.ensure_editor()?;
let editor = self.editor.as_mut().ok_or_else(|| {
PyRuntimeError::new_err("Internal error: editor missing after initialization")
})?;
editor
.move_page(from_index, to_index)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
}
#[pyo3(signature = (dpi=150))]
fn flatten_to_images(&mut self, py: Python<'_>, dpi: u32) -> PyResult<Py<PyBytes>> {
#[cfg(feature = "rendering")]
{
let bytes = crate::rendering::flatten_to_images(&mut self.inner, dpi)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
Ok(PyBytes::new(py, &bytes).unbind())
}
#[cfg(not(feature = "rendering"))]
{
Err(PyRuntimeError::new_err("Rendering feature not enabled"))
}
}
fn __len__(&mut self) -> PyResult<usize> {
self.page_count()
}
fn __getitem__(slf: Py<Self>, py: Python<'_>, index: isize) -> PyResult<PyDocPage> {
let count = slf.borrow_mut(py).page_count()? as isize;
let idx = if index < 0 { count + index } else { index };
if idx < 0 || idx >= count {
return Err(pyo3::exceptions::PyIndexError::new_err("page index out of range"));
}
Ok(PyDocPage {
doc: slf,
page_index: idx as usize,
})
}
fn __iter__(slf: Py<Self>, py: Python<'_>) -> PyResult<PyDocPageIter> {
let count = slf.borrow_mut(py).page_count()?;
Ok(PyDocPageIter {
doc: slf,
index: 0,
count,
})
}
fn __repr__(&self) -> String {
format!("PdfDocument(version={}.{})", self.inner.version().0, self.inner.version().1)
}
}
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "PdfDocumentIter")]
pub struct PyDocPageIter {
doc: Py<PyPdfDocument>,
index: usize,
count: usize,
}
#[pymethods]
impl PyDocPageIter {
fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
slf
}
fn __next__(&mut self, py: Python<'_>) -> Option<PyDocPage> {
if self.index >= self.count {
return None;
}
let page = PyDocPage {
doc: self.doc.clone_ref(py),
page_index: self.index,
};
self.index += 1;
Some(page)
}
}
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "Page", subclass)]
pub struct PyDocPage {
doc: Py<PyPdfDocument>,
page_index: usize,
}
#[pymethods]
impl PyDocPage {
#[getter]
fn index(&self) -> usize {
self.page_index
}
#[getter]
fn bbox(&self, py: Python<'_>) -> PyResult<(f32, f32, f32, f32)> {
self.doc
.borrow_mut(py)
.inner
.get_page_media_box(self.page_index)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
}
#[getter]
fn width(&self, py: Python<'_>) -> PyResult<f32> {
self.bbox(py).map(|(llx, _, urx, _)| urx - llx)
}
#[getter]
fn height(&self, py: Python<'_>) -> PyResult<f32> {
self.bbox(py).map(|(_, lly, _, ury)| ury - lly)
}
#[getter]
fn text(&self, py: Python<'_>) -> PyResult<String> {
self.doc.borrow_mut(py).extract_text(self.page_index, None)
}
#[getter]
fn chars(&self, py: Python<'_>) -> PyResult<Vec<PyTextChar>> {
self.doc.borrow_mut(py).extract_chars(self.page_index, None)
}
#[getter]
fn words(&self, py: Python<'_>) -> PyResult<Vec<PyWord>> {
self.doc
.borrow_mut(py)
.extract_words(self.page_index, None, None, None)
}
#[getter]
fn lines(&self, py: Python<'_>) -> PyResult<Vec<PyTextLine>> {
self.doc
.borrow_mut(py)
.extract_text_lines(self.page_index, None, None, None, None)
}
#[getter]
fn spans(&self, py: Python<'_>) -> PyResult<Vec<PyTextSpan>> {
self.doc
.borrow_mut(py)
.extract_spans(self.page_index, None, None)
}
#[getter]
fn tables(&self, py: Python<'_>) -> PyResult<Py<PyAny>> {
self.doc
.borrow_mut(py)
.extract_tables(py, self.page_index, None, None)
}
#[getter]
fn images(&self, py: Python<'_>) -> PyResult<Py<PyAny>> {
self.doc
.borrow_mut(py)
.extract_images(py, self.page_index, None)
}
#[getter]
fn annotations(&self, py: Python<'_>) -> PyResult<Py<PyAny>> {
self.doc.borrow_mut(py).get_annotations(py, self.page_index)
}
#[getter]
fn paths(&self, py: Python<'_>) -> PyResult<Py<PyAny>> {
self.doc
.borrow_mut(py)
.extract_paths(py, self.page_index, None)
}
#[pyo3(signature = (preserve_layout=false, detect_headings=true, include_images=false, image_output_dir=None, embed_images=true, include_form_fields=true))]
fn markdown(
&self,
py: Python<'_>,
preserve_layout: bool,
detect_headings: bool,
include_images: bool,
image_output_dir: Option<String>,
embed_images: bool,
include_form_fields: bool,
) -> PyResult<String> {
self.doc.borrow_mut(py).to_markdown(
self.page_index,
preserve_layout,
detect_headings,
include_images,
image_output_dir,
embed_images,
include_form_fields,
)
}
#[pyo3(signature = (preserve_layout=false, detect_headings=true, include_images=false, image_output_dir=None))]
fn plain_text(
&self,
py: Python<'_>,
preserve_layout: bool,
detect_headings: bool,
include_images: bool,
image_output_dir: Option<String>,
) -> PyResult<String> {
self.doc.borrow_mut(py).to_plain_text(
self.page_index,
preserve_layout,
detect_headings,
include_images,
image_output_dir,
)
}
#[pyo3(signature = (preserve_layout=false, detect_headings=true, include_images=false, image_output_dir=None, embed_images=true, include_form_fields=true))]
fn html(
&self,
py: Python<'_>,
preserve_layout: bool,
detect_headings: bool,
include_images: bool,
image_output_dir: Option<String>,
embed_images: bool,
include_form_fields: bool,
) -> PyResult<String> {
self.doc.borrow_mut(py).to_html(
self.page_index,
preserve_layout,
detect_headings,
include_images,
image_output_dir,
embed_images,
include_form_fields,
)
}
#[pyo3(signature = (
dpi=None,
format=None,
background=None,
transparent=false,
render_annotations=None,
jpeg_quality=None,
))]
#[allow(clippy::too_many_arguments)]
fn render(
&self,
py: Python<'_>,
dpi: Option<u32>,
format: Option<&str>,
background: Option<(f32, f32, f32, f32)>,
transparent: bool,
render_annotations: Option<bool>,
jpeg_quality: Option<u8>,
) -> PyResult<Vec<u8>> {
self.doc.borrow_mut(py).render_page(
self.page_index,
dpi,
format,
background,
transparent,
render_annotations,
jpeg_quality,
)
}
#[pyo3(signature = (pattern, case_insensitive=false, literal=false, whole_word=false, max_results=100))]
fn search(
&self,
py: Python<'_>,
pattern: &str,
case_insensitive: bool,
literal: bool,
whole_word: bool,
max_results: usize,
) -> PyResult<Py<PyAny>> {
self.doc.borrow_mut(py).search_page(
py,
self.page_index,
pattern,
case_insensitive,
literal,
whole_word,
max_results,
)
}
fn region(&self, py: Python<'_>, x: f32, y: f32, width: f32, height: f32) -> PyPdfPageRegion {
PyPdfPageRegion {
doc: self.doc.clone_ref(py),
page_index: self.page_index,
region: crate::geometry::Rect::new(x, y, width, height),
}
}
fn __repr__(&self) -> String {
format!("Page(index={})", self.page_index)
}
}
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "FormField")]
pub struct PyFormField {
inner: RustFormField,
}
#[pymethods]
impl PyFormField {
#[allow(clippy::misnamed_getters)]
#[getter]
fn name(&self) -> &str {
&self.inner.full_name
}
#[getter]
fn field_type(&self) -> &str {
match &self.inner.field_type {
RustFieldType::Text => "text",
RustFieldType::Button => "button",
RustFieldType::Choice => "choice",
RustFieldType::Signature => "signature",
RustFieldType::Unknown(_) => "unknown",
}
}
#[getter]
fn value(&self, py: Python<'_>) -> PyResult<Py<PyAny>> {
field_value_to_python(&self.inner.value, py)
}
#[getter]
fn tooltip(&self) -> Option<&str> {
self.inner.tooltip.as_deref()
}
#[getter]
fn bounds(&self) -> Option<(f64, f64, f64, f64)> {
self.inner.bounds.map(|b| (b[0], b[1], b[2], b[3]))
}
#[getter]
fn flags(&self) -> Option<u32> {
self.inner.flags
}
#[getter]
fn max_length(&self) -> Option<u32> {
self.inner.max_length
}
#[getter]
fn is_readonly(&self) -> bool {
self.inner
.flags
.is_some_and(|f| f & field_flags::READ_ONLY != 0)
}
#[getter]
fn is_required(&self) -> bool {
self.inner
.flags
.is_some_and(|f| f & field_flags::REQUIRED != 0)
}
fn __repr__(&self) -> String {
format!("FormField(name=\"{}\", type=\"{}\")", self.inner.full_name, self.field_type())
}
}
fn field_value_to_python(value: &RustFieldValue, py: Python<'_>) -> PyResult<Py<PyAny>> {
match value {
RustFieldValue::Text(s) => Ok(s.into_pyobject(py)?.into_any().unbind()),
RustFieldValue::Name(s) => Ok(s.into_pyobject(py)?.into_any().unbind()),
RustFieldValue::Boolean(b) => Ok(b.into_pyobject(py)?.to_owned().into_any().unbind()),
RustFieldValue::Array(v) => Ok(v.into_pyobject(py)?.into_any().unbind()),
RustFieldValue::None => Ok(py.None()),
}
}
fn form_field_value_to_python(
value: &crate::editor::form_fields::FormFieldValue,
py: Python<'_>,
) -> PyResult<Py<PyAny>> {
use crate::editor::form_fields::FormFieldValue;
match value {
FormFieldValue::Text(s) => Ok(s.into_pyobject(py)?.into_any().unbind()),
FormFieldValue::Choice(s) => Ok(s.into_pyobject(py)?.into_any().unbind()),
FormFieldValue::Boolean(b) => Ok(b.into_pyobject(py)?.to_owned().into_any().unbind()),
FormFieldValue::MultiChoice(v) => Ok(v.into_pyobject(py)?.into_any().unbind()),
FormFieldValue::None => Ok(py.None()),
}
}
fn python_to_form_field_value(
value: &Bound<'_, PyAny>,
) -> PyResult<crate::editor::form_fields::FormFieldValue> {
use crate::editor::form_fields::FormFieldValue;
if let Ok(b) = value.extract::<bool>() {
Ok(FormFieldValue::Boolean(b))
} else if let Ok(s) = value.extract::<String>() {
Ok(FormFieldValue::Text(s))
} else if let Ok(v) = value.extract::<Vec<String>>() {
Ok(FormFieldValue::MultiChoice(v))
} else if value.is_none() {
Ok(FormFieldValue::None)
} else {
Err(PyRuntimeError::new_err("Invalid value."))
}
}
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "Pdf", skip_from_py_object)]
pub struct PyPdf {
bytes: Vec<u8>,
}
#[pymethods]
impl PyPdf {
#[staticmethod]
#[pyo3(signature = (content, title=None, author=None))]
fn from_markdown(content: &str, title: Option<&str>, author: Option<&str>) -> PyResult<Self> {
let mut b = RustPdfBuilder::new();
if let Some(t) = title {
b = b.title(t);
}
if let Some(a) = author {
b = b.author(a);
}
let pdf = b
.from_markdown(content)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
Ok(PyPdf {
bytes: pdf.into_bytes(),
})
}
#[staticmethod]
#[pyo3(signature = (content, title=None, author=None))]
fn from_html(content: &str, title: Option<&str>, author: Option<&str>) -> PyResult<Self> {
let mut b = RustPdfBuilder::new();
if let Some(t) = title {
b = b.title(t);
}
if let Some(a) = author {
b = b.author(a);
}
let pdf = b
.from_html(content)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
Ok(PyPdf {
bytes: pdf.into_bytes(),
})
}
#[staticmethod]
#[pyo3(signature = (content, title=None, author=None))]
fn from_text(content: &str, title: Option<&str>, author: Option<&str>) -> PyResult<Self> {
let mut b = RustPdfBuilder::new();
if let Some(t) = title {
b = b.title(t);
}
if let Some(a) = author {
b = b.author(a);
}
let pdf = b
.from_text(content)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
Ok(PyPdf {
bytes: pdf.into_bytes(),
})
}
#[staticmethod]
#[pyo3(signature = (content, template, title=None, author=None))]
fn from_markdown_with_template(
content: &str,
template: &PyPageTemplate,
title: Option<&str>,
author: Option<&str>,
) -> PyResult<Self> {
let mut b = RustPdfBuilder::new().template(template.inner.clone());
if let Some(t) = title {
b = b.title(t);
}
if let Some(a) = author {
b = b.author(a);
}
let pdf = b
.from_markdown(content)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
Ok(PyPdf {
bytes: pdf.into_bytes(),
})
}
#[staticmethod]
fn from_html_css(html: &str, css: &str, font_bytes: &Bound<'_, PyBytes>) -> PyResult<Self> {
let bytes = font_bytes.as_bytes().to_vec();
let pdf = crate::api::Pdf::from_html_css(html, css, bytes)
.map_err(|e| PyRuntimeError::new_err(format!("from_html_css failed: {e}")))?;
Ok(PyPdf {
bytes: pdf.into_bytes(),
})
}
#[staticmethod]
fn from_html_css_with_fonts(
html: &str,
css: &str,
fonts: Vec<(String, Bound<'_, PyBytes>)>,
) -> PyResult<Self> {
if fonts.is_empty() {
return Err(PyValueError::new_err("at least one font must be provided"));
}
let font_vec: Vec<(String, Vec<u8>)> = fonts
.into_iter()
.map(|(name, b)| (name, b.as_bytes().to_vec()))
.collect();
let pdf = crate::api::Pdf::from_html_css_with_fonts(html, css, font_vec).map_err(|e| {
PyRuntimeError::new_err(format!("from_html_css_with_fonts failed: {e}"))
})?;
Ok(PyPdf {
bytes: pdf.into_bytes(),
})
}
fn save(&self, path: &str) -> PyResult<()> {
std::fs::write(path, &self.bytes).map_err(|e| PyIOError::new_err(e.to_string()))
}
fn to_bytes<'py>(&self, py: Python<'py>) -> Py<PyBytes> {
PyBytes::new(py, &self.bytes).unbind()
}
#[staticmethod]
fn from_image(path: &str) -> PyResult<Self> {
use crate::api::Pdf;
let pdf = Pdf::from_image(path).map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
Ok(PyPdf {
bytes: pdf.into_bytes(),
})
}
#[staticmethod]
fn from_images(paths: Vec<String>) -> PyResult<Self> {
use crate::api::Pdf;
let pdf = Pdf::from_images(&paths).map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
Ok(PyPdf {
bytes: pdf.into_bytes(),
})
}
#[staticmethod]
fn from_image_bytes(data: &Bound<'_, PyBytes>) -> PyResult<Self> {
use crate::api::Pdf;
let pdf = Pdf::from_image_bytes(data.as_bytes())
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
Ok(PyPdf {
bytes: pdf.into_bytes(),
})
}
#[staticmethod]
fn from_bytes(data: &Bound<'_, PyBytes>) -> PyResult<Self> {
let mut pdf = crate::api::Pdf::from_bytes(data.as_bytes().to_vec())
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
let bytes = pdf
.save_to_bytes()
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
Ok(PyPdf { bytes })
}
#[staticmethod]
fn merge(paths: Vec<String>) -> PyResult<Self> {
let bytes =
crate::api::merge_pdfs(&paths).map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
Ok(PyPdf { bytes })
}
fn __len__(&self) -> usize {
self.bytes.len()
}
fn __repr__(&self) -> String {
format!("Pdf({} bytes)", self.bytes.len())
}
}
#[cfg(feature = "office")]
use crate::converters::office::OfficeConverter as RustOfficeConverter;
#[cfg(feature = "office")]
#[pyclass(
module = "pdf_oxide.pdf_oxide",
name = "OfficeConverter",
skip_from_py_object
)]
pub struct PyOfficeConverter;
#[cfg(not(feature = "office"))]
#[pyclass(
module = "pdf_oxide.pdf_oxide",
name = "OfficeConverter",
skip_from_py_object
)]
pub struct PyOfficeConverter;
#[cfg(not(feature = "office"))]
#[pymethods]
impl PyOfficeConverter {
#[new]
fn new() -> PyResult<Self> {
Err(PyRuntimeError::new_err("Office feature not enabled."))
}
#[staticmethod]
#[pyo3(signature = (*_args, **_kwargs))]
fn convert(
_args: &Bound<'_, PyTuple>,
_kwargs: Option<Bound<'_, PyDict>>,
) -> PyResult<Py<PyAny>> {
Err(PyRuntimeError::new_err("Office feature not enabled."))
}
#[staticmethod]
#[pyo3(signature = (*_args, **_kwargs))]
fn from_docx(
_args: &Bound<'_, PyTuple>,
_kwargs: Option<Bound<'_, PyDict>>,
) -> PyResult<Py<PyAny>> {
Err(PyRuntimeError::new_err("Office feature not enabled."))
}
#[staticmethod]
#[pyo3(signature = (*_args, **_kwargs))]
fn from_xlsx(
_args: &Bound<'_, PyTuple>,
_kwargs: Option<Bound<'_, PyDict>>,
) -> PyResult<Py<PyAny>> {
Err(PyRuntimeError::new_err("Office feature not enabled."))
}
#[staticmethod]
#[pyo3(signature = (*_args, **_kwargs))]
fn from_pptx(
_args: &Bound<'_, PyTuple>,
_kwargs: Option<Bound<'_, PyDict>>,
) -> PyResult<Py<PyAny>> {
Err(PyRuntimeError::new_err("Office feature not enabled."))
}
}
#[cfg(feature = "office")]
#[pymethods]
impl PyOfficeConverter {
#[new]
fn new() -> Self {
PyOfficeConverter
}
#[staticmethod]
fn from_docx(path: &str) -> PyResult<PyPdf> {
let res = RustOfficeConverter::new()
.convert_docx(path)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
Ok(PyPdf { bytes: res })
}
#[staticmethod]
fn from_docx_bytes(data: &Bound<'_, PyBytes>) -> PyResult<PyPdf> {
let res = RustOfficeConverter::new()
.convert_docx_bytes(data.as_bytes())
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
Ok(PyPdf { bytes: res })
}
#[staticmethod]
fn from_xlsx(path: &str) -> PyResult<PyPdf> {
let res = RustOfficeConverter::new()
.convert_xlsx(path)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
Ok(PyPdf { bytes: res })
}
#[staticmethod]
fn from_xlsx_bytes(data: &Bound<'_, PyBytes>) -> PyResult<PyPdf> {
let res = RustOfficeConverter::new()
.convert_xlsx_bytes(data.as_bytes())
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
Ok(PyPdf { bytes: res })
}
#[staticmethod]
fn from_pptx(path: &str) -> PyResult<PyPdf> {
let res = RustOfficeConverter::new()
.convert_pptx(path)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
Ok(PyPdf { bytes: res })
}
#[staticmethod]
fn from_pptx_bytes(data: &Bound<'_, PyBytes>) -> PyResult<PyPdf> {
let res = RustOfficeConverter::new()
.convert_pptx_bytes(data.as_bytes())
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
Ok(PyPdf { bytes: res })
}
#[staticmethod]
fn convert(path: &str) -> PyResult<PyPdf> {
let res = RustOfficeConverter::new()
.convert(path)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
Ok(PyPdf { bytes: res })
}
}
use crate::editor::{ElementId, PdfElement, PdfPage as RustPdfPage, PdfText as RustPdfText};
#[pyclass(
module = "pdf_oxide.pdf_oxide",
name = "PdfPageRegion",
skip_from_py_object
)]
pub struct PyPdfPageRegion {
pub doc: Py<PyPdfDocument>,
pub page_index: usize,
pub region: crate::geometry::Rect,
}
#[pymethods]
impl PyPdfPageRegion {
#[getter]
fn bbox(&self) -> (f32, f32, f32, f32) {
(self.region.x, self.region.y, self.region.width, self.region.height)
}
fn extract_text(&self, py: Python<'_>) -> PyResult<String> {
let mut d = self.doc.bind(py).borrow_mut();
d.extract_text(self.page_index, Some(self.bbox()))
}
fn extract_words(&self, py: Python<'_>) -> PyResult<Vec<PyWord>> {
let mut d = self.doc.bind(py).borrow_mut();
d.extract_words(self.page_index, Some(self.bbox()), None, None)
}
fn extract_text_lines(&self, py: Python<'_>) -> PyResult<Vec<PyTextLine>> {
let mut d = self.doc.bind(py).borrow_mut();
d.extract_text_lines(self.page_index, Some(self.bbox()), None, None, None)
}
#[pyo3(signature = (table_settings=None))]
fn extract_tables(
&self,
py: Python<'_>,
table_settings: Option<Bound<'_, pyo3::types::PyDict>>,
) -> PyResult<Py<PyAny>> {
let mut d = self.doc.bind(py).borrow_mut();
d.extract_tables(py, self.page_index, Some(self.bbox()), table_settings)
}
fn extract_images(&self, py: Python<'_>) -> PyResult<Py<PyAny>> {
let mut d = self.doc.bind(py).borrow_mut();
d.extract_images(py, self.page_index, Some(self.bbox()))
}
fn extract_paths(&self, py: Python<'_>) -> PyResult<Py<PyAny>> {
let mut d = self.doc.bind(py).borrow_mut();
let res = d
.inner
.extract_paths_in_rect(self.page_index, self.region)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
let list = pyo3::types::PyList::empty(py);
for p in &res {
list.append(path_to_py_dict(py, p)?)?;
}
Ok(list.into())
}
fn __repr__(&self) -> String {
format!("PdfPageRegion(page={}, bbox={:?})", self.page_index, self.region)
}
}
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "PdfPage")]
pub struct PyPdfPage {
inner: RustPdfPage,
}
#[pymethods]
impl PyPdfPage {
#[getter]
fn index(&self) -> usize {
self.inner.page_index
}
#[getter]
fn width(&self) -> f32 {
self.inner.width
}
#[getter]
fn height(&self) -> f32 {
self.inner.height
}
fn children(&self) -> Vec<PyPdfElement> {
self.inner
.children()
.into_iter()
.map(|e| PyPdfElement { inner: e })
.collect()
}
fn find_text_containing(&self, needle: &str) -> Vec<PyPdfText> {
self.inner
.find_text_containing(needle)
.into_iter()
.map(|t| PyPdfText { inner: t })
.collect()
}
fn find_images(&self) -> Vec<PyPdfImage> {
self.inner
.find_images()
.into_iter()
.map(|i| PyPdfImage { inner: i })
.collect()
}
fn get_element(&self, _id: &str) -> Option<PyPdfElement> {
None
}
fn set_text(&mut self, text_id: &PyPdfTextId, new_text: &str) -> PyResult<()> {
self.inner
.set_text(text_id.inner, new_text)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
}
fn annotations(&self) -> Vec<PyAnnotationWrapper> {
self.inner
.annotations()
.iter()
.map(|a| PyAnnotationWrapper { inner: a.clone() })
.collect()
}
fn add_link(&mut self, x: f32, y: f32, width: f32, height: f32, url: &str) -> String {
use crate::writer::LinkAnnotation;
let l = LinkAnnotation::uri(crate::geometry::Rect::new(x, y, width, height), url);
format!("{:?}", self.inner.add_annotation(l))
}
fn add_highlight(
&mut self,
x: f32,
y: f32,
width: f32,
height: f32,
color: (f32, f32, f32),
) -> String {
use crate::writer::TextMarkupAnnotation;
use crate::TextMarkupType;
let l = TextMarkupAnnotation::from_rect(
TextMarkupType::Highlight,
crate::geometry::Rect::new(x, y, width, height),
)
.with_color(color.0, color.1, color.2);
format!("{:?}", self.inner.add_annotation(l))
}
fn add_note(&mut self, x: f32, y: f32, text: &str) -> String {
use crate::writer::TextAnnotation;
let l = TextAnnotation::new(crate::geometry::Rect::new(x, y, 24.0, 24.0), text);
format!("{:?}", self.inner.add_annotation(l))
}
fn remove_annotation(&mut self, index: usize) -> bool {
self.inner.remove_annotation(index).is_some()
}
#[pyo3(signature = (text, x, y, font_size=12.0))]
fn add_text(&mut self, text: &str, x: f32, y: f32, font_size: f32) -> PyPdfTextId {
use crate::elements::{FontSpec, TextContent, TextStyle};
let c = TextContent {
text: text.to_string(),
bbox: crate::geometry::Rect::new(x, y, text.len() as f32 * font_size * 0.6, font_size),
font: FontSpec {
name: "Helvetica".to_string(),
size: font_size,
},
style: TextStyle::default(),
reading_order: None,
artifact_type: None,
origin: None,
rotation_degrees: None,
matrix: None,
};
PyPdfTextId {
inner: self.inner.add_text(c),
}
}
fn remove_element(&mut self, id: &PyPdfTextId) -> bool {
self.inner.remove_element(id.inner)
}
fn __repr__(&self) -> String {
format!(
"PdfPage(index={}, width={:.1}, height={:.1})",
self.inner.page_index, self.inner.width, self.inner.height
)
}
}
#[pyclass(
module = "pdf_oxide.pdf_oxide",
name = "PdfTextId",
skip_from_py_object
)]
#[derive(Clone)]
pub struct PyPdfTextId {
inner: ElementId,
}
#[pymethods]
impl PyPdfTextId {
fn __repr__(&self) -> String {
format!("PdfTextId({:?})", self.inner)
}
}
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "PdfText", skip_from_py_object)]
#[derive(Clone)]
pub struct PyPdfText {
inner: RustPdfText,
}
#[pymethods]
impl PyPdfText {
#[getter]
fn id(&self) -> PyPdfTextId {
PyPdfTextId {
inner: self.inner.id(),
}
}
#[getter]
fn value(&self) -> String {
self.inner.text().to_string()
}
#[getter]
fn text(&self) -> String {
self.value()
}
#[getter]
fn bbox(&self) -> (f32, f32, f32, f32) {
let r = self.inner.bbox();
(r.x, r.y, r.width, r.height)
}
#[getter]
fn font_name(&self) -> String {
self.inner.font_name().to_string()
}
#[getter]
fn font_size(&self) -> f32 {
self.inner.font_size()
}
#[getter]
fn is_bold(&self) -> bool {
self.inner.is_bold()
}
#[getter]
fn is_italic(&self) -> bool {
self.inner.is_italic()
}
fn contains(&self, n: &str) -> bool {
self.inner.contains(n)
}
fn starts_with(&self, p: &str) -> bool {
self.inner.starts_with(p)
}
fn ends_with(&self, s: &str) -> bool {
self.inner.ends_with(s)
}
fn __repr__(&self) -> String {
format!("PdfText({:?})", self.inner.text())
}
}
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "PdfImage", skip_from_py_object)]
#[derive(Clone)]
pub struct PyPdfImage {
inner: crate::editor::PdfImage,
}
#[pymethods]
impl PyPdfImage {
#[getter]
fn bbox(&self) -> (f32, f32, f32, f32) {
let r = self.inner.bbox();
(r.x, r.y, r.width, r.height)
}
#[getter]
fn width(&self) -> u32 {
self.inner.dimensions().0
}
#[getter]
fn height(&self) -> u32 {
self.inner.dimensions().1
}
#[getter]
fn aspect_ratio(&self) -> f32 {
self.inner.aspect_ratio()
}
fn __repr__(&self) -> String {
let (w, h) = self.inner.dimensions();
format!("PdfImage({}x{})", w, h)
}
}
#[pyclass(
module = "pdf_oxide.pdf_oxide",
name = "PdfAnnotation",
skip_from_py_object
)]
#[derive(Clone)]
pub struct PyAnnotationWrapper {
inner: crate::editor::AnnotationWrapper,
}
#[pymethods]
impl PyAnnotationWrapper {
#[getter]
fn subtype(&self) -> String {
format!("{:?}", self.inner.subtype())
}
#[getter]
fn rect(&self) -> (f32, f32, f32, f32) {
let r = self.inner.rect();
(r.x, r.y, r.width, r.height)
}
#[getter]
fn contents(&self) -> Option<String> {
self.inner.contents().map(|s| s.to_string())
}
#[getter]
fn color(&self) -> Option<(f32, f32, f32)> {
self.inner.color()
}
#[getter]
fn is_modified(&self) -> bool {
self.inner.is_modified()
}
#[getter]
fn is_new(&self) -> bool {
self.inner.is_new()
}
fn __repr__(&self) -> String {
format!("PdfAnnotation(subtype={:?})", self.inner.subtype())
}
}
#[pyclass(
module = "pdf_oxide.pdf_oxide",
name = "PdfElement",
skip_from_py_object
)]
#[derive(Clone)]
pub struct PyPdfElement {
inner: PdfElement,
}
#[pymethods]
impl PyPdfElement {
fn is_text(&self) -> bool {
self.inner.is_text()
}
fn is_image(&self) -> bool {
self.inner.is_image()
}
fn is_path(&self) -> bool {
self.inner.is_path()
}
fn is_table(&self) -> bool {
self.inner.is_table()
}
fn is_structure(&self) -> bool {
self.inner.is_structure()
}
fn as_text(&self) -> Option<PyPdfText> {
if let PdfElement::Text(t) = &self.inner {
Some(PyPdfText { inner: t.clone() })
} else {
None
}
}
fn as_image(&self) -> Option<PyPdfImage> {
if let PdfElement::Image(i) = &self.inner {
Some(PyPdfImage { inner: i.clone() })
} else {
None
}
}
#[getter]
fn bbox(&self) -> (f32, f32, f32, f32) {
let r = self.inner.bbox();
(r.x, r.y, r.width, r.height)
}
fn __repr__(&self) -> String {
"PdfElement".to_string()
}
}
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "TextChar", skip_from_py_object)]
#[derive(Clone)]
pub struct PyTextChar {
inner: RustTextChar,
}
#[pymethods]
impl PyTextChar {
#[getter]
fn char(&self) -> char {
self.inner.char
}
#[getter]
fn bbox(&self) -> (f32, f32, f32, f32) {
(
self.inner.bbox.x,
self.inner.bbox.y,
self.inner.bbox.width,
self.inner.bbox.height,
)
}
#[getter]
fn font_name(&self) -> String {
self.inner.font_name.clone()
}
#[getter]
fn font_size(&self) -> f32 {
self.inner.font_size
}
#[getter]
fn font_weight(&self) -> String {
format!("{:?}", self.inner.font_weight)
}
#[getter]
fn is_italic(&self) -> bool {
self.inner.is_italic
}
#[getter]
fn is_monospace(&self) -> bool {
self.inner.is_monospace
}
#[getter]
fn color(&self) -> (f32, f32, f32) {
(self.inner.color.r, self.inner.color.g, self.inner.color.b)
}
#[getter]
fn rotation_degrees(&self) -> f32 {
self.inner.rotation_degrees
}
#[getter]
fn origin_x(&self) -> f32 {
self.inner.origin_x
}
#[getter]
fn origin_y(&self) -> f32 {
self.inner.origin_y
}
#[getter]
fn advance_width(&self) -> f32 {
self.inner.advance_width
}
#[getter]
fn mcid(&self) -> Option<u32> {
self.inner.mcid
}
}
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "TextSpan", skip_from_py_object)]
#[derive(Clone)]
pub struct PyTextSpan {
inner: crate::layout::TextSpan,
}
#[pymethods]
impl PyTextSpan {
#[getter]
fn text(&self) -> &str {
&self.inner.text
}
#[getter]
fn bbox(&self) -> (f32, f32, f32, f32) {
(
self.inner.bbox.x,
self.inner.bbox.y,
self.inner.bbox.width,
self.inner.bbox.height,
)
}
#[getter]
fn font_name(&self) -> &str {
&self.inner.font_name
}
#[getter]
fn font_size(&self) -> f32 {
self.inner.font_size
}
#[getter]
fn is_bold(&self) -> bool {
self.inner.font_weight as u16 >= 700
}
#[getter]
fn is_italic(&self) -> bool {
self.inner.is_italic
}
#[getter]
fn is_monospace(&self) -> bool {
self.inner.is_monospace
}
#[getter]
fn char_widths(&self) -> Vec<f32> {
self.inner.char_widths.clone()
}
#[getter]
fn color(&self) -> (f32, f32, f32) {
(self.inner.color.r, self.inner.color.g, self.inner.color.b)
}
}
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "TextWord", skip_from_py_object)]
#[derive(Clone)]
pub struct PyWord {
inner: crate::layout::Word,
}
#[pymethods]
impl PyWord {
#[getter]
fn text(&self) -> String {
self.inner.text.clone()
}
#[getter]
fn bbox(&self) -> (f32, f32, f32, f32) {
(
self.inner.bbox.x,
self.inner.bbox.y,
self.inner.bbox.width,
self.inner.bbox.height,
)
}
#[getter]
fn font_name(&self) -> String {
self.inner.dominant_font.clone()
}
#[getter]
fn font_size(&self) -> f32 {
self.inner.avg_font_size
}
#[getter]
fn is_bold(&self) -> bool {
self.inner.is_bold
}
#[getter]
fn is_italic(&self) -> bool {
self.inner.is_italic
}
#[getter]
fn chars(&self) -> Vec<PyTextChar> {
self.inner
.chars
.iter()
.map(|c| PyTextChar { inner: c.clone() })
.collect()
}
}
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "TextLine", skip_from_py_object)]
#[derive(Clone)]
pub struct PyTextLine {
inner: crate::layout::TextLine,
}
#[pymethods]
impl PyTextLine {
#[getter]
fn text(&self) -> String {
self.inner.text.clone()
}
#[getter]
fn bbox(&self) -> (f32, f32, f32, f32) {
(
self.inner.bbox.x,
self.inner.bbox.y,
self.inner.bbox.width,
self.inner.bbox.height,
)
}
#[getter]
fn words(&self) -> Vec<PyWord> {
self.inner
.words
.iter()
.map(|w| PyWord { inner: w.clone() })
.collect()
}
#[getter]
fn chars(&self) -> Vec<PyTextChar> {
self.inner
.words
.iter()
.flat_map(|w| w.chars.iter().map(|c| PyTextChar { inner: c.clone() }))
.collect()
}
}
fn path_to_py_dict(py: Python<'_>, path: &crate::elements::PathContent) -> PyResult<Py<PyAny>> {
let d = pyo3::types::PyDict::new(py);
d.set_item("bbox", (path.bbox.x, path.bbox.y, path.bbox.width, path.bbox.height))?;
d.set_item("stroke_width", path.stroke_width)?;
if let Some(ref c) = path.stroke_color {
d.set_item("stroke_color", (c.r, c.g, c.b))?;
} else {
d.set_item("stroke_color", py.None())?;
}
if let Some(ref c) = path.fill_color {
d.set_item("fill_color", (c.r, c.g, c.b))?;
} else {
d.set_item("fill_color", py.None())?;
}
d.set_item("operations_count", path.operations.len())?;
let ops_list = pyo3::types::PyList::empty(py);
for op in &path.operations {
let op_dict = pyo3::types::PyDict::new(py);
match op {
crate::elements::PathOperation::MoveTo(x, y) => {
op_dict.set_item("op", "move_to")?;
op_dict.set_item("x", *x)?;
op_dict.set_item("y", *y)?;
},
crate::elements::PathOperation::LineTo(x, y) => {
op_dict.set_item("op", "line_to")?;
op_dict.set_item("x", *x)?;
op_dict.set_item("y", *y)?;
},
crate::elements::PathOperation::CurveTo(cx1, cy1, cx2, cy2, x, y) => {
op_dict.set_item("op", "curve_to")?;
op_dict.set_item("cx1", *cx1)?;
op_dict.set_item("cy1", *cy1)?;
op_dict.set_item("cx2", *cx2)?;
op_dict.set_item("cy2", *cy2)?;
op_dict.set_item("x", *x)?;
op_dict.set_item("y", *y)?;
},
crate::elements::PathOperation::Rectangle(x, y, w, h) => {
op_dict.set_item("op", "rectangle")?;
op_dict.set_item("x", *x)?;
op_dict.set_item("y", *y)?;
op_dict.set_item("width", *w)?;
op_dict.set_item("height", *h)?;
},
crate::elements::PathOperation::ClosePath => {
op_dict.set_item("op", "close_path")?;
},
}
ops_list.append(op_dict)?;
}
d.set_item("operations", ops_list)?;
Ok(d.into())
}
fn table_settings_to_config(
settings: Option<Bound<'_, pyo3::types::PyDict>>,
) -> PyResult<crate::structure::spatial_table_detector::TableDetectionConfig> {
use crate::structure::spatial_table_detector::{TableDetectionConfig, TableStrategy};
let mut c = TableDetectionConfig::default();
if let Some(d) = settings {
if let Some(v) = d.get_item("horizontal_strategy")? {
let s: String = v.extract()?;
c.horizontal_strategy = match s.as_str() {
"lines" => TableStrategy::Lines,
"text" => TableStrategy::Text,
"both" => TableStrategy::Both,
_ => return Err(PyRuntimeError::new_err("Invalid strategy")),
};
}
if let Some(v) = d.get_item("vertical_strategy")? {
let s: String = v.extract()?;
c.vertical_strategy = match s.as_str() {
"lines" => TableStrategy::Lines,
"text" => TableStrategy::Text,
"both" => TableStrategy::Both,
_ => return Err(PyRuntimeError::new_err("Invalid strategy")),
};
}
if let Some(v) = d.get_item("column_tolerance")? {
c.column_tolerance = v.extract()?;
}
if let Some(v) = d.get_item("row_tolerance")? {
c.row_tolerance = v.extract()?;
}
if let Some(v) = d.get_item("min_table_cells")? {
c.min_table_cells = v.extract()?;
}
}
Ok(c)
}
fn outline_items_to_py(
py: Python<'_>,
items: &[crate::outline::OutlineItem],
) -> PyResult<Py<PyAny>> {
let list = pyo3::types::PyList::empty(py);
for i in items {
let d = pyo3::types::PyDict::new(py);
d.set_item("title", &i.title)?;
match &i.dest {
Some(crate::outline::Destination::PageIndex(idx)) => {
d.set_item("page", *idx)?;
},
_ => {
d.set_item("page", py.None())?;
},
}
d.set_item("children", outline_items_to_py(py, &i.children)?)?;
list.append(d)?;
}
Ok(list.into())
}
#[cfg(feature = "ocr")]
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "OcrEngine", unsendable)]
pub struct PyOcrEngine {
inner: crate::ocr::OcrEngine,
}
#[cfg(not(feature = "ocr"))]
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "OcrEngine", unsendable)]
pub struct PyOcrEngine {}
#[cfg(not(feature = "ocr"))]
#[pymethods]
impl PyOcrEngine {
#[new]
#[pyo3(signature = (*_args, **_kwargs))]
fn new(_args: &Bound<'_, PyTuple>, _kwargs: Option<Bound<'_, PyDict>>) -> PyResult<Self> {
Err(PyRuntimeError::new_err("OCR not enabled."))
}
}
#[cfg(feature = "ocr")]
#[pymethods]
impl PyOcrEngine {
#[new]
#[pyo3(signature = (det_model_path, rec_model_path, dict_path, config=None))]
fn new(
det_model_path: &str,
rec_model_path: &str,
dict_path: &str,
config: Option<&PyOcrConfig>,
) -> PyResult<Self> {
let c = config.map(|c| c.inner.clone()).unwrap_or_default();
let e = crate::ocr::OcrEngine::new(det_model_path, rec_model_path, dict_path, c)
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
Ok(PyOcrEngine { inner: e })
}
}
#[cfg(feature = "ocr")]
#[pyclass(
module = "pdf_oxide.pdf_oxide",
name = "OcrConfig",
skip_from_py_object
)]
#[derive(Clone)]
pub struct PyOcrConfig {
inner: crate::ocr::OcrConfig,
}
#[cfg(not(feature = "ocr"))]
#[pyclass(
module = "pdf_oxide.pdf_oxide",
name = "OcrConfig",
skip_from_py_object
)]
#[derive(Clone)]
pub struct PyOcrConfig {}
#[cfg(not(feature = "ocr"))]
#[pymethods]
impl PyOcrConfig {
#[new]
#[pyo3(signature = (**_kwargs))]
fn new(_kwargs: Option<Bound<'_, PyDict>>) -> PyResult<Self> {
Err(PyRuntimeError::new_err("OCR not enabled."))
}
}
#[cfg(feature = "ocr")]
#[pymethods]
impl PyOcrConfig {
#[new]
#[pyo3(signature = (det_threshold=None, rec_threshold=None, num_threads=None))]
fn new(
det_threshold: Option<f32>,
rec_threshold: Option<f32>,
num_threads: Option<usize>,
) -> Self {
let mut c = crate::ocr::OcrConfig::default();
if let Some(v) = det_threshold {
c.det_threshold = v;
}
if let Some(v) = rec_threshold {
c.rec_threshold = v;
}
if let Some(v) = num_threads {
c.num_threads = v;
}
PyOcrConfig { inner: c }
}
}
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "Color", skip_from_py_object)]
#[derive(Clone)]
pub struct PyColor {
inner: RustColor,
}
#[pymethods]
impl PyColor {
#[new]
fn new(r: f32, g: f32, b: f32) -> Self {
PyColor {
inner: RustColor::new(r, g, b),
}
}
#[staticmethod]
fn black() -> Self {
PyColor {
inner: RustColor::black(),
}
}
#[staticmethod]
fn white() -> Self {
PyColor {
inner: RustColor::white(),
}
}
#[staticmethod]
fn red() -> Self {
PyColor {
inner: RustColor::new(1.0, 0.0, 0.0),
}
}
#[staticmethod]
fn green() -> Self {
PyColor {
inner: RustColor::new(0.0, 1.0, 0.0),
}
}
#[staticmethod]
fn blue() -> Self {
PyColor {
inner: RustColor::new(0.0, 0.0, 1.0),
}
}
#[staticmethod]
fn from_hex(hex: &str) -> PyResult<Self> {
let hex = hex.trim_start_matches('#');
if hex.len() != 6 {
return Err(PyRuntimeError::new_err("Invalid hex color length"));
}
let r = u8::from_str_radix(&hex[0..2], 16)
.map_err(|_| PyRuntimeError::new_err("Invalid hex color"))? as f32
/ 255.0;
let g = u8::from_str_radix(&hex[2..4], 16)
.map_err(|_| PyRuntimeError::new_err("Invalid hex color"))? as f32
/ 255.0;
let b = u8::from_str_radix(&hex[4..6], 16)
.map_err(|_| PyRuntimeError::new_err("Invalid hex color"))? as f32
/ 255.0;
Ok(PyColor {
inner: RustColor::new(r, g, b),
})
}
#[getter]
fn r(&self) -> f32 {
self.inner.r
}
#[getter]
fn g(&self) -> f32 {
self.inner.g
}
#[getter]
fn b(&self) -> f32 {
self.inner.b
}
}
#[allow(dead_code)]
#[pyclass(
module = "pdf_oxide.pdf_oxide",
name = "BlendMode",
skip_from_py_object
)]
#[derive(Clone)]
pub struct PyBlendMode {
inner: RustBlendMode,
}
#[pymethods]
impl PyBlendMode {
#[staticmethod]
#[allow(non_snake_case)]
fn NORMAL() -> Self {
PyBlendMode {
inner: RustBlendMode::Normal,
}
}
#[staticmethod]
#[allow(non_snake_case)]
fn MULTIPLY() -> Self {
PyBlendMode {
inner: RustBlendMode::Multiply,
}
}
#[staticmethod]
#[allow(non_snake_case)]
fn SCREEN() -> Self {
PyBlendMode {
inner: RustBlendMode::Screen,
}
}
#[staticmethod]
#[allow(non_snake_case)]
fn OVERLAY() -> Self {
PyBlendMode {
inner: RustBlendMode::Overlay,
}
}
#[staticmethod]
#[allow(non_snake_case)]
fn DARKEN() -> Self {
PyBlendMode {
inner: RustBlendMode::Darken,
}
}
#[staticmethod]
#[allow(non_snake_case)]
fn LIGHTEN() -> Self {
PyBlendMode {
inner: RustBlendMode::Lighten,
}
}
#[staticmethod]
#[allow(non_snake_case)]
fn COLOR_DODGE() -> Self {
PyBlendMode {
inner: RustBlendMode::ColorDodge,
}
}
#[staticmethod]
#[allow(non_snake_case)]
fn COLOR_BURN() -> Self {
PyBlendMode {
inner: RustBlendMode::ColorBurn,
}
}
#[staticmethod]
#[allow(non_snake_case)]
fn HARD_LIGHT() -> Self {
PyBlendMode {
inner: RustBlendMode::HardLight,
}
}
#[staticmethod]
#[allow(non_snake_case)]
fn SOFT_LIGHT() -> Self {
PyBlendMode {
inner: RustBlendMode::SoftLight,
}
}
#[staticmethod]
#[allow(non_snake_case)]
fn DIFFERENCE() -> Self {
PyBlendMode {
inner: RustBlendMode::Difference,
}
}
#[staticmethod]
#[allow(non_snake_case)]
fn EXCLUSION() -> Self {
PyBlendMode {
inner: RustBlendMode::Exclusion,
}
}
}
#[allow(dead_code)]
#[pyclass(
module = "pdf_oxide.pdf_oxide",
name = "ExtGState",
skip_from_py_object
)]
#[derive(Clone)]
pub struct PyExtGState {
fill_alpha: Option<f32>,
stroke_alpha: Option<f32>,
blend_mode: Option<RustBlendMode>,
}
#[pymethods]
impl PyExtGState {
#[new]
fn new() -> Self {
PyExtGState {
fill_alpha: None,
stroke_alpha: None,
blend_mode: None,
}
}
fn alpha(&self, a: f32) -> Self {
let v = Some(a.clamp(0.0, 1.0));
PyExtGState {
fill_alpha: v,
stroke_alpha: v,
blend_mode: self.blend_mode,
}
}
fn fill_alpha(&self, a: f32) -> Self {
PyExtGState {
fill_alpha: Some(a.clamp(0.0, 1.0)),
stroke_alpha: self.stroke_alpha,
blend_mode: self.blend_mode,
}
}
fn stroke_alpha(&self, a: f32) -> Self {
PyExtGState {
fill_alpha: self.fill_alpha,
stroke_alpha: Some(a.clamp(0.0, 1.0)),
blend_mode: self.blend_mode,
}
}
fn blend_mode(&self, mode: &PyBlendMode) -> Self {
PyExtGState {
fill_alpha: self.fill_alpha,
stroke_alpha: self.stroke_alpha,
blend_mode: Some(mode.inner),
}
}
#[staticmethod]
fn semi_transparent() -> Self {
PyExtGState {
fill_alpha: Some(0.5),
stroke_alpha: Some(0.5),
blend_mode: None,
}
}
}
#[pyclass(
module = "pdf_oxide.pdf_oxide",
name = "LinearGradient",
skip_from_py_object
)]
#[derive(Clone)]
pub struct PyLinearGradient {
x1: f32,
y1: f32,
x2: f32,
y2: f32,
stops: Vec<(f32, RustColor)>,
}
#[pymethods]
impl PyLinearGradient {
#[new]
fn new() -> Self {
PyLinearGradient {
x1: 0.0,
y1: 0.0,
x2: 100.0,
y2: 100.0,
stops: Vec::new(),
}
}
fn start(&self, x: f32, y: f32) -> Self {
let mut slf = self.clone();
slf.x1 = x;
slf.y1 = y;
slf
}
fn end(&self, x: f32, y: f32) -> Self {
let mut slf = self.clone();
slf.x2 = x;
slf.y2 = y;
slf
}
fn add_stop(&self, offset: f32, color: &PyColor) -> Self {
let mut slf = self.clone();
slf.stops.push((offset, color.inner));
slf
}
#[staticmethod]
fn horizontal(width: f32, start: &PyColor, end: &PyColor) -> Self {
PyLinearGradient {
x1: 0.0,
y1: 0.0,
x2: width,
y2: 0.0,
stops: vec![(0.0, start.inner), (1.0, end.inner)],
}
}
#[staticmethod]
fn vertical(height: f32, start: &PyColor, end: &PyColor) -> Self {
PyLinearGradient {
x1: 0.0,
y1: 0.0,
x2: 0.0,
y2: height,
stops: vec![(0.0, start.inner), (1.0, end.inner)],
}
}
}
#[pyclass(
module = "pdf_oxide.pdf_oxide",
name = "RadialGradient",
skip_from_py_object
)]
#[derive(Clone)]
pub struct PyRadialGradient {
x1: f32,
y1: f32,
r1: f32,
x2: f32,
y2: f32,
r2: f32,
stops: Vec<(f32, RustColor)>,
}
#[pymethods]
impl PyRadialGradient {
#[new]
fn new() -> Self {
PyRadialGradient {
x1: 50.0,
y1: 50.0,
r1: 0.0,
x2: 50.0,
y2: 50.0,
r2: 50.0,
stops: Vec::new(),
}
}
fn inner_circle(&self, x: f32, y: f32, r: f32) -> Self {
let mut slf = self.clone();
slf.x1 = x;
slf.y1 = y;
slf.r1 = r;
slf
}
fn outer_circle(&self, x: f32, y: f32, r: f32) -> Self {
let mut slf = self.clone();
slf.x2 = x;
slf.y2 = y;
slf.r2 = r;
slf
}
fn add_stop(&self, offset: f32, color: &PyColor) -> Self {
let mut slf = self.clone();
slf.stops.push((offset, color.inner));
slf
}
#[staticmethod]
fn centered(x: f32, y: f32, radius: f32) -> Self {
PyRadialGradient {
x1: x,
y1: y,
r1: 0.0,
x2: x,
y2: y,
r2: radius,
stops: Vec::new(),
}
}
}
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "LineCap", skip_from_py_object)]
#[derive(Clone)]
pub struct PyLineCap {
pub inner: RustLineCap,
}
#[pymethods]
impl PyLineCap {
#[staticmethod]
fn butt() -> Self {
Self {
inner: RustLineCap::Butt,
}
}
#[staticmethod]
fn round() -> Self {
Self {
inner: RustLineCap::Round,
}
}
#[staticmethod]
fn square() -> Self {
Self {
inner: RustLineCap::Square,
}
}
#[staticmethod]
#[allow(non_snake_case)]
fn BUTT() -> Self {
Self::butt()
}
#[staticmethod]
#[allow(non_snake_case)]
fn ROUND() -> Self {
Self::round()
}
#[staticmethod]
#[allow(non_snake_case)]
fn SQUARE() -> Self {
Self::square()
}
}
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "LineJoin", skip_from_py_object)]
#[derive(Clone)]
pub struct PyLineJoin {
pub inner: RustLineJoin,
}
#[pymethods]
impl PyLineJoin {
#[staticmethod]
fn miter() -> Self {
Self {
inner: RustLineJoin::Miter,
}
}
#[staticmethod]
fn round() -> Self {
Self {
inner: RustLineJoin::Round,
}
}
#[staticmethod]
fn bevel() -> Self {
Self {
inner: RustLineJoin::Bevel,
}
}
#[staticmethod]
#[allow(non_snake_case)]
fn MITER() -> Self {
Self::miter()
}
#[staticmethod]
#[allow(non_snake_case)]
fn ROUND() -> Self {
Self::round()
}
#[staticmethod]
#[allow(non_snake_case)]
fn BEVEL() -> Self {
Self::bevel()
}
}
#[pyclass(
module = "pdf_oxide.pdf_oxide",
name = "PatternPresets",
skip_from_py_object
)]
#[derive(Clone)]
pub struct PyPatternPresets;
#[pymethods]
impl PyPatternPresets {
#[staticmethod]
fn horizontal_stripes(width: f32, height: f32, stripe_height: f32, color: &PyColor) -> Vec<u8> {
crate::writer::PatternPresets::horizontal_stripes(width, height, stripe_height, color.inner)
}
#[staticmethod]
fn vertical_stripes(width: f32, height: f32, stripe_width: f32, color: &PyColor) -> Vec<u8> {
crate::writer::PatternPresets::vertical_stripes(width, height, stripe_width, color.inner)
}
#[staticmethod]
fn checkerboard(size: f32, color1: &PyColor, color2: &PyColor) -> Vec<u8> {
crate::writer::PatternPresets::checkerboard(size, color1.inner, color2.inner)
}
#[staticmethod]
fn dots(spacing: f32, radius: f32, color: &PyColor) -> Vec<u8> {
crate::writer::PatternPresets::dots(spacing, radius, color.inner)
}
#[staticmethod]
fn diagonal_lines(size: f32, line_width: f32, color: &PyColor) -> Vec<u8> {
crate::writer::PatternPresets::diagonal_lines(size, line_width, color.inner)
}
#[staticmethod]
fn crosshatch(size: f32, line_width: f32, color: &PyColor) -> Vec<u8> {
crate::writer::PatternPresets::crosshatch(size, line_width, color.inner)
}
}
#[pyclass(
module = "pdf_oxide.pdf_oxide",
name = "ArtifactStyle",
skip_from_py_object
)]
#[derive(Clone)]
pub struct PyArtifactStyle {
pub inner: crate::writer::ArtifactStyle,
}
#[pymethods]
impl PyArtifactStyle {
#[new]
fn new() -> Self {
Self {
inner: crate::writer::ArtifactStyle::default(),
}
}
fn font<'a>(
mut slf: PyRefMut<'a, Self>,
name: &str,
size: f32,
) -> PyResult<PyRefMut<'a, Self>> {
slf.inner = slf.inner.clone().font(name, size);
Ok(slf)
}
fn bold<'a>(mut slf: PyRefMut<'a, Self>) -> PyResult<PyRefMut<'a, Self>> {
slf.inner = slf.inner.clone().bold();
Ok(slf)
}
}
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "Artifact", from_py_object)]
#[derive(Clone)]
pub struct PyArtifact {
pub inner: crate::writer::Artifact,
}
#[pymethods]
impl PyArtifact {
#[new]
fn new() -> Self {
Self {
inner: crate::writer::Artifact::new(),
}
}
#[staticmethod]
fn center(t: &str) -> Self {
Self {
inner: crate::writer::Artifact::center(t),
}
}
fn with_left<'a>(mut slf: PyRefMut<'a, Self>, t: &str) -> PyResult<PyRefMut<'a, Self>> {
slf.inner = slf.inner.clone().with_left(t);
Ok(slf)
}
}
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "Header", from_py_object)]
#[derive(Clone)]
pub struct PyHeader {
pub inner: PyArtifact,
}
#[pymethods]
impl PyHeader {
#[new]
fn new() -> Self {
Self {
inner: PyArtifact {
inner: crate::writer::Artifact::new(),
},
}
}
#[staticmethod]
fn center(t: &str) -> Self {
Self {
inner: PyArtifact {
inner: crate::writer::Artifact::center(t),
},
}
}
}
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "Footer", from_py_object)]
#[derive(Clone)]
pub struct PyFooter {
pub inner: PyArtifact,
}
#[pymethods]
impl PyFooter {
#[new]
fn new() -> Self {
Self {
inner: PyArtifact {
inner: crate::writer::Artifact::new(),
},
}
}
#[staticmethod]
fn center(t: &str) -> Self {
Self {
inner: PyArtifact {
inner: crate::writer::Artifact::center(t),
},
}
}
}
fn parse_stamp_type(name: &str) -> crate::writer::StampType {
use crate::writer::StampType;
match name {
"Approved" => StampType::Approved,
"Experimental" => StampType::Experimental,
"NotApproved" => StampType::NotApproved,
"AsIs" => StampType::AsIs,
"Expired" => StampType::Expired,
"NotForPublicRelease" => StampType::NotForPublicRelease,
"Confidential" => StampType::Confidential,
"Final" => StampType::Final,
"Sold" => StampType::Sold,
"Departmental" => StampType::Departmental,
"ForComment" => StampType::ForComment,
"TopSecret" => StampType::TopSecret,
"Draft" => StampType::Draft,
"ForPublicRelease" => StampType::ForPublicRelease,
other => StampType::Custom(other.to_string()),
}
}
enum PendingPageOp {
Font(String, f32),
At(f32, f32),
Text(String),
Heading(u8, String),
Paragraph(String),
Space(f32),
HorizontalRule,
LinkUrl(String),
LinkPage(usize),
LinkNamed(String),
Highlight(f32, f32, f32),
Underline(f32, f32, f32),
Strikeout(f32, f32, f32),
Squiggly(f32, f32, f32),
StickyNote(String),
StickyNoteAt(f32, f32, String),
Watermark(String),
WatermarkConfidential,
WatermarkDraft,
Stamp(String),
FreeText {
x: f32,
y: f32,
w: f32,
h: f32,
text: String,
},
TextField {
name: String,
x: f32,
y: f32,
w: f32,
h: f32,
default_value: Option<String>,
},
Checkbox {
name: String,
x: f32,
y: f32,
w: f32,
h: f32,
checked: bool,
},
ComboBox {
name: String,
x: f32,
y: f32,
w: f32,
h: f32,
options: Vec<String>,
selected: Option<String>,
},
RadioGroup {
name: String,
buttons: Vec<(String, f32, f32, f32, f32)>,
selected: Option<String>,
},
PushButton {
name: String,
x: f32,
y: f32,
w: f32,
h: f32,
caption: String,
},
Rect(f32, f32, f32, f32),
FilledRect(f32, f32, f32, f32, f32, f32, f32),
Line(f32, f32, f32, f32),
}
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "EmbeddedFont")]
pub struct PyEmbeddedFont {
pub(crate) inner: Option<crate::writer::EmbeddedFont>,
}
#[pymethods]
impl PyEmbeddedFont {
#[staticmethod]
fn from_file(path: &str) -> PyResult<Self> {
crate::writer::EmbeddedFont::from_file(path)
.map(|inner| Self { inner: Some(inner) })
.map_err(|e| PyIOError::new_err(format!("failed to load font: {e}")))
}
#[staticmethod]
#[pyo3(signature = (data, name=None))]
fn from_bytes(data: &Bound<'_, PyBytes>, name: Option<String>) -> PyResult<Self> {
let bytes = data.as_bytes().to_vec();
crate::writer::EmbeddedFont::from_data(name, bytes)
.map(|inner| Self { inner: Some(inner) })
.map_err(|e| PyValueError::new_err(format!("failed to parse font: {e}")))
}
#[getter]
fn name(&self) -> &str {
self.inner.as_ref().map(|f| f.name.as_str()).unwrap_or("")
}
fn __repr__(&self) -> String {
match self.inner.as_ref() {
Some(f) => format!("EmbeddedFont('{}')", f.name),
None => "EmbeddedFont(<consumed>)".to_string(),
}
}
}
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "DocumentBuilder")]
pub struct PyDocumentBuilder {
pub(crate) inner: Option<crate::writer::DocumentBuilder>,
}
impl PyDocumentBuilder {
fn take_inner(&mut self, ctx: &str) -> PyResult<crate::writer::DocumentBuilder> {
self.inner.take().ok_or_else(|| {
PyRuntimeError::new_err(format!("DocumentBuilder already consumed ({ctx})"))
})
}
fn with_inner<F>(&mut self, ctx: &str, f: F) -> PyResult<()>
where
F: FnOnce(crate::writer::DocumentBuilder) -> crate::writer::DocumentBuilder,
{
let taken = self.take_inner(ctx)?;
self.inner = Some(f(taken));
Ok(())
}
}
#[pymethods]
impl PyDocumentBuilder {
#[new]
fn new() -> Self {
Self {
inner: Some(crate::writer::DocumentBuilder::new()),
}
}
fn title<'a>(mut slf: PyRefMut<'a, Self>, title: String) -> PyResult<PyRefMut<'a, Self>> {
slf.with_inner("title", |b| b.title(title))?;
Ok(slf)
}
fn author<'a>(mut slf: PyRefMut<'a, Self>, author: String) -> PyResult<PyRefMut<'a, Self>> {
slf.with_inner("author", |b| b.author(author))?;
Ok(slf)
}
fn subject<'a>(mut slf: PyRefMut<'a, Self>, subject: String) -> PyResult<PyRefMut<'a, Self>> {
slf.with_inner("subject", |b| b.subject(subject))?;
Ok(slf)
}
fn keywords<'a>(mut slf: PyRefMut<'a, Self>, keywords: String) -> PyResult<PyRefMut<'a, Self>> {
slf.with_inner("keywords", |b| b.keywords(keywords))?;
Ok(slf)
}
fn creator<'a>(mut slf: PyRefMut<'a, Self>, creator: String) -> PyResult<PyRefMut<'a, Self>> {
slf.with_inner("creator", |b| b.creator(creator))?;
Ok(slf)
}
fn register_embedded_font<'a>(
mut slf: PyRefMut<'a, Self>,
name: String,
font: &Bound<'_, PyEmbeddedFont>,
) -> PyResult<PyRefMut<'a, Self>> {
let embedded = font
.borrow_mut()
.inner
.take()
.ok_or_else(|| PyRuntimeError::new_err("EmbeddedFont already consumed"))?;
slf.with_inner("register_embedded_font", |b| b.register_embedded_font(name, embedded))?;
Ok(slf)
}
fn a4_page(slf_handle: Py<Self>) -> PyFluentPageBuilder {
PyFluentPageBuilder {
parent: slf_handle,
page_size: Some(crate::writer::PageSize::A4),
custom_width: 0.0,
custom_height: 0.0,
ops: Vec::new(),
done_called: false,
}
}
fn letter_page(slf_handle: Py<Self>) -> PyFluentPageBuilder {
PyFluentPageBuilder {
parent: slf_handle,
page_size: Some(crate::writer::PageSize::Letter),
custom_width: 0.0,
custom_height: 0.0,
ops: Vec::new(),
done_called: false,
}
}
fn page(slf_handle: Py<Self>, width: f32, height: f32) -> PyFluentPageBuilder {
PyFluentPageBuilder {
parent: slf_handle,
page_size: None,
custom_width: width,
custom_height: height,
ops: Vec::new(),
done_called: false,
}
}
fn build<'py>(&mut self, py: Python<'py>) -> PyResult<Bound<'py, PyBytes>> {
let inner = self.take_inner("build")?;
let bytes = inner
.build()
.map_err(|e| PyRuntimeError::new_err(format!("build failed: {e}")))?;
Ok(PyBytes::new(py, &bytes))
}
fn save(&mut self, path: &str) -> PyResult<()> {
let inner = self.take_inner("save")?;
inner
.save(path)
.map_err(|e| PyIOError::new_err(format!("save failed: {e}")))
}
fn save_encrypted(
&mut self,
path: &str,
user_password: &str,
owner_password: &str,
) -> PyResult<()> {
let inner = self.take_inner("save_encrypted")?;
inner
.save_encrypted(path, user_password, owner_password)
.map_err(|e| PyIOError::new_err(format!("save_encrypted failed: {e}")))
}
fn to_bytes_encrypted<'py>(
&mut self,
py: Python<'py>,
user_password: &str,
owner_password: &str,
) -> PyResult<Bound<'py, PyBytes>> {
let inner = self.take_inner("to_bytes_encrypted")?;
let bytes = inner
.to_bytes_encrypted(user_password, owner_password)
.map_err(|e| PyRuntimeError::new_err(format!("to_bytes_encrypted failed: {e}")))?;
Ok(PyBytes::new(py, &bytes))
}
}
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "FluentPageBuilder")]
pub struct PyFluentPageBuilder {
parent: Py<PyDocumentBuilder>,
page_size: Option<crate::writer::PageSize>,
custom_width: f32,
custom_height: f32,
ops: Vec<PendingPageOp>,
done_called: bool,
}
impl PyFluentPageBuilder {
fn push(&mut self, op: PendingPageOp) -> PyResult<()> {
if self.done_called {
return Err(PyRuntimeError::new_err("FluentPageBuilder.done() already called"));
}
self.ops.push(op);
Ok(())
}
}
#[pymethods]
impl PyFluentPageBuilder {
fn font<'a>(
mut slf: PyRefMut<'a, Self>,
name: String,
size: f32,
) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::Font(name, size))?;
Ok(slf)
}
fn at<'a>(mut slf: PyRefMut<'a, Self>, x: f32, y: f32) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::At(x, y))?;
Ok(slf)
}
fn text<'a>(mut slf: PyRefMut<'a, Self>, text: String) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::Text(text))?;
Ok(slf)
}
fn heading<'a>(
mut slf: PyRefMut<'a, Self>,
level: u8,
text: String,
) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::Heading(level, text))?;
Ok(slf)
}
fn paragraph<'a>(mut slf: PyRefMut<'a, Self>, text: String) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::Paragraph(text))?;
Ok(slf)
}
fn space<'a>(mut slf: PyRefMut<'a, Self>, points: f32) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::Space(points))?;
Ok(slf)
}
fn horizontal_rule<'a>(mut slf: PyRefMut<'a, Self>) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::HorizontalRule)?;
Ok(slf)
}
fn link_url<'a>(mut slf: PyRefMut<'a, Self>, url: String) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::LinkUrl(url))?;
Ok(slf)
}
fn link_page<'a>(mut slf: PyRefMut<'a, Self>, page: usize) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::LinkPage(page))?;
Ok(slf)
}
fn link_named<'a>(
mut slf: PyRefMut<'a, Self>,
destination: String,
) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::LinkNamed(destination))?;
Ok(slf)
}
fn highlight<'a>(
mut slf: PyRefMut<'a, Self>,
color: (f32, f32, f32),
) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::Highlight(color.0, color.1, color.2))?;
Ok(slf)
}
fn underline<'a>(
mut slf: PyRefMut<'a, Self>,
color: (f32, f32, f32),
) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::Underline(color.0, color.1, color.2))?;
Ok(slf)
}
fn strikeout<'a>(
mut slf: PyRefMut<'a, Self>,
color: (f32, f32, f32),
) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::Strikeout(color.0, color.1, color.2))?;
Ok(slf)
}
fn squiggly<'a>(
mut slf: PyRefMut<'a, Self>,
color: (f32, f32, f32),
) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::Squiggly(color.0, color.1, color.2))?;
Ok(slf)
}
fn sticky_note<'a>(mut slf: PyRefMut<'a, Self>, text: String) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::StickyNote(text))?;
Ok(slf)
}
fn sticky_note_at<'a>(
mut slf: PyRefMut<'a, Self>,
x: f32,
y: f32,
text: String,
) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::StickyNoteAt(x, y, text))?;
Ok(slf)
}
fn watermark<'a>(mut slf: PyRefMut<'a, Self>, text: String) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::Watermark(text))?;
Ok(slf)
}
fn watermark_confidential<'a>(mut slf: PyRefMut<'a, Self>) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::WatermarkConfidential)?;
Ok(slf)
}
fn watermark_draft<'a>(mut slf: PyRefMut<'a, Self>) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::WatermarkDraft)?;
Ok(slf)
}
fn stamp<'a>(mut slf: PyRefMut<'a, Self>, name: String) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::Stamp(name))?;
Ok(slf)
}
fn freetext<'a>(
mut slf: PyRefMut<'a, Self>,
x: f32,
y: f32,
w: f32,
h: f32,
text: String,
) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::FreeText { x, y, w, h, text })?;
Ok(slf)
}
#[pyo3(signature = (name, x, y, w, h, default_value=None))]
fn text_field<'a>(
mut slf: PyRefMut<'a, Self>,
name: String,
x: f32,
y: f32,
w: f32,
h: f32,
default_value: Option<String>,
) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::TextField {
name,
x,
y,
w,
h,
default_value,
})?;
Ok(slf)
}
fn checkbox<'a>(
mut slf: PyRefMut<'a, Self>,
name: String,
x: f32,
y: f32,
w: f32,
h: f32,
checked: bool,
) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::Checkbox {
name,
x,
y,
w,
h,
checked,
})?;
Ok(slf)
}
#[pyo3(signature = (name, x, y, w, h, options, selected=None))]
fn combo_box<'a>(
mut slf: PyRefMut<'a, Self>,
name: String,
x: f32,
y: f32,
w: f32,
h: f32,
options: Vec<String>,
selected: Option<String>,
) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::ComboBox {
name,
x,
y,
w,
h,
options,
selected,
})?;
Ok(slf)
}
#[pyo3(signature = (name, buttons, selected=None))]
fn radio_group<'a>(
mut slf: PyRefMut<'a, Self>,
name: String,
buttons: Vec<(String, f32, f32, f32, f32)>,
selected: Option<String>,
) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::RadioGroup {
name,
buttons,
selected,
})?;
Ok(slf)
}
fn push_button<'a>(
mut slf: PyRefMut<'a, Self>,
name: String,
x: f32,
y: f32,
w: f32,
h: f32,
caption: String,
) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::PushButton {
name,
x,
y,
w,
h,
caption,
})?;
Ok(slf)
}
fn rect<'a>(
mut slf: PyRefMut<'a, Self>,
x: f32,
y: f32,
w: f32,
h: f32,
) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::Rect(x, y, w, h))?;
Ok(slf)
}
fn filled_rect<'a>(
mut slf: PyRefMut<'a, Self>,
x: f32,
y: f32,
w: f32,
h: f32,
r: f32,
g: f32,
b: f32,
) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::FilledRect(x, y, w, h, r, g, b))?;
Ok(slf)
}
fn line<'a>(
mut slf: PyRefMut<'a, Self>,
x1: f32,
y1: f32,
x2: f32,
y2: f32,
) -> PyResult<PyRefMut<'a, Self>> {
slf.push(PendingPageOp::Line(x1, y1, x2, y2))?;
Ok(slf)
}
fn done(&mut self, py: Python) -> PyResult<Py<PyDocumentBuilder>> {
if self.done_called {
return Err(PyRuntimeError::new_err("FluentPageBuilder.done() already called"));
}
self.done_called = true;
let parent_handle = self.parent.clone_ref(py);
let mut parent_ref = parent_handle.borrow_mut(py);
let inner = parent_ref
.inner
.as_mut()
.ok_or_else(|| PyRuntimeError::new_err("DocumentBuilder already consumed"))?;
let page_size = self
.page_size
.unwrap_or(crate::writer::PageSize::Custom(self.custom_width, self.custom_height));
let mut page = inner.page(page_size);
for op in self.ops.drain(..) {
page = match op {
PendingPageOp::Font(name, size) => page.font(&name, size),
PendingPageOp::At(x, y) => page.at(x, y),
PendingPageOp::Text(text) => page.text(&text),
PendingPageOp::Heading(level, text) => page.heading(level, &text),
PendingPageOp::Paragraph(text) => page.paragraph(&text),
PendingPageOp::Space(points) => page.space(points),
PendingPageOp::HorizontalRule => page.horizontal_rule(),
PendingPageOp::LinkUrl(url) => page.link_url(&url),
PendingPageOp::LinkPage(p) => page.link_page(p),
PendingPageOp::LinkNamed(dest) => page.link_named(&dest),
PendingPageOp::Highlight(r, g, b) => page.highlight((r, g, b)),
PendingPageOp::Underline(r, g, b) => page.underline((r, g, b)),
PendingPageOp::Strikeout(r, g, b) => page.strikeout((r, g, b)),
PendingPageOp::Squiggly(r, g, b) => page.squiggly((r, g, b)),
PendingPageOp::StickyNote(text) => page.sticky_note(&text),
PendingPageOp::StickyNoteAt(x, y, text) => page.sticky_note_at(x, y, &text),
PendingPageOp::Watermark(text) => page.watermark(&text),
PendingPageOp::WatermarkConfidential => page.watermark_confidential(),
PendingPageOp::WatermarkDraft => page.watermark_draft(),
PendingPageOp::Stamp(name) => page.stamp(parse_stamp_type(&name)),
PendingPageOp::FreeText { x, y, w, h, text } => {
page.freetext(crate::geometry::Rect::new(x, y, w, h), &text)
},
PendingPageOp::TextField {
name,
x,
y,
w,
h,
default_value,
} => page.text_field(name, x, y, w, h, default_value),
PendingPageOp::Checkbox {
name,
x,
y,
w,
h,
checked,
} => page.checkbox(name, x, y, w, h, checked),
PendingPageOp::ComboBox {
name,
x,
y,
w,
h,
options,
selected,
} => page.combo_box(name, x, y, w, h, options, selected),
PendingPageOp::RadioGroup {
name,
buttons,
selected,
} => page.radio_group(name, buttons, selected),
PendingPageOp::PushButton {
name,
x,
y,
w,
h,
caption,
} => page.push_button(name, x, y, w, h, caption),
PendingPageOp::Rect(x, y, w, h) => page.rect(x, y, w, h),
PendingPageOp::FilledRect(x, y, w, h, r, g, b) => {
page.filled_rect(x, y, w, h, r, g, b)
},
PendingPageOp::Line(x1, y1, x2, y2) => page.line(x1, y1, x2, y2),
};
}
page.done();
drop(parent_ref);
Ok(parent_handle)
}
}
#[pyclass(
module = "pdf_oxide.pdf_oxide",
name = "PageTemplate",
skip_from_py_object
)]
#[derive(Clone)]
pub struct PyPageTemplate {
pub inner: crate::writer::PageTemplate,
}
#[pymethods]
impl PyPageTemplate {
#[new]
fn new() -> Self {
Self {
inner: crate::writer::PageTemplate::new(),
}
}
fn header<'a>(
mut slf: PyRefMut<'a, Self>,
h: &Bound<'_, PyAny>,
) -> PyResult<PyRefMut<'a, Self>> {
let a = if let Ok(ph) = h.extract::<PyHeader>() {
ph.inner.inner.clone()
} else {
h.extract::<PyArtifact>()?.inner.clone()
};
slf.inner = slf.inner.clone().header(a);
Ok(slf)
}
fn footer<'a>(
mut slf: PyRefMut<'a, Self>,
f: &Bound<'_, PyAny>,
) -> PyResult<PyRefMut<'a, Self>> {
let a = if let Ok(pf) = f.extract::<PyFooter>() {
pf.inner.inner.clone()
} else {
f.extract::<PyArtifact>()?.inner.clone()
};
slf.inner = slf.inner.clone().footer(a);
Ok(slf)
}
}
static PYO3_LOG_RESET_HANDLE: std::sync::OnceLock<pyo3_log::ResetHandle> =
std::sync::OnceLock::new();
fn init_pyo3_log_handle() {
PYO3_LOG_RESET_HANDLE.get_or_init(|| {
pyo3_log::try_init().unwrap_or_else(|_| {
pyo3_log::Logger::default().reset_handle()
})
});
}
fn reset_pyo3_log_cache() {
if let Some(handle) = PYO3_LOG_RESET_HANDLE.get() {
handle.reset();
}
}
#[pyfunction]
fn setup_logging() {
init_pyo3_log_handle();
reset_pyo3_log_cache();
}
#[pyfunction]
fn set_log_level(level: &str) -> PyResult<()> {
use log::LevelFilter;
let filter = match level.to_ascii_lowercase().as_str() {
"off" | "none" | "disabled" => LevelFilter::Off,
"error" => LevelFilter::Error,
"warn" | "warning" => LevelFilter::Warn,
"info" => LevelFilter::Info,
"debug" => LevelFilter::Debug,
"trace" => LevelFilter::Trace,
other => {
return Err(pyo3::exceptions::PyValueError::new_err(format!(
"invalid log level '{}': expected off, error, warn, info, debug, or trace",
other
)));
},
};
log::set_max_level(filter);
reset_pyo3_log_cache();
Ok(())
}
#[pyfunction]
fn disable_logging() {
log::set_max_level(log::LevelFilter::Off);
reset_pyo3_log_cache();
}
#[pyfunction]
fn get_log_level() -> &'static str {
match log::max_level() {
log::LevelFilter::Off => "off",
log::LevelFilter::Error => "error",
log::LevelFilter::Warn => "warn",
log::LevelFilter::Info => "info",
log::LevelFilter::Debug => "debug",
log::LevelFilter::Trace => "trace",
}
}
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "LayoutParams", frozen)]
pub struct PyLayoutParams {
pub word_gap_threshold: f32,
pub line_gap_threshold: f32,
pub median_char_width: f32,
pub median_font_size: f32,
pub median_line_spacing: f32,
pub column_count: usize,
}
#[pymethods]
impl PyLayoutParams {
#[getter]
fn word_gap_threshold(&self) -> f32 {
self.word_gap_threshold
}
#[getter]
fn line_gap_threshold(&self) -> f32 {
self.line_gap_threshold
}
#[getter]
fn median_char_width(&self) -> f32 {
self.median_char_width
}
#[getter]
fn median_font_size(&self) -> f32 {
self.median_font_size
}
#[getter]
fn median_line_spacing(&self) -> f32 {
self.median_line_spacing
}
#[getter]
fn column_count(&self) -> usize {
self.column_count
}
fn __repr__(&self) -> String {
format!(
"LayoutParams(word_gap={:.2}, line_gap={:.2}, char_width={:.2}, font_size={:.2}, line_spacing={:.2}, columns={})",
self.word_gap_threshold,
self.line_gap_threshold,
self.median_char_width,
self.median_font_size,
self.median_line_spacing,
self.column_count,
)
}
}
#[pyclass(
module = "pdf_oxide.pdf_oxide",
name = "ExtractionProfile",
frozen,
from_py_object
)]
#[derive(Clone)]
pub struct PyExtractionProfile {
inner: crate::config::ExtractionProfile,
}
#[pymethods]
impl PyExtractionProfile {
#[getter]
fn name(&self) -> &'static str {
self.inner.name
}
#[getter]
fn tj_offset_threshold(&self) -> f32 {
self.inner.tj_offset_threshold
}
#[getter]
fn word_margin_ratio(&self) -> f32 {
self.inner.word_margin_ratio
}
#[getter]
fn space_threshold_em_ratio(&self) -> f32 {
self.inner.space_threshold_em_ratio
}
#[getter]
fn space_char_multiplier(&self) -> f32 {
self.inner.space_char_multiplier
}
#[getter]
fn use_adaptive_threshold(&self) -> bool {
self.inner.use_adaptive_threshold
}
#[staticmethod]
fn conservative() -> Self {
Self {
inner: crate::config::ExtractionProfile::CONSERVATIVE,
}
}
#[staticmethod]
fn aggressive() -> Self {
Self {
inner: crate::config::ExtractionProfile::AGGRESSIVE,
}
}
#[staticmethod]
fn balanced() -> Self {
Self {
inner: crate::config::ExtractionProfile::BALANCED,
}
}
#[staticmethod]
fn academic() -> Self {
Self {
inner: crate::config::ExtractionProfile::ACADEMIC,
}
}
#[staticmethod]
fn policy() -> Self {
Self {
inner: crate::config::ExtractionProfile::POLICY,
}
}
#[staticmethod]
fn form() -> Self {
Self {
inner: crate::config::ExtractionProfile::FORM,
}
}
#[staticmethod]
fn government() -> Self {
Self {
inner: crate::config::ExtractionProfile::GOVERNMENT,
}
}
#[staticmethod]
fn scanned_ocr() -> Self {
Self {
inner: crate::config::ExtractionProfile::SCANNED_OCR,
}
}
#[staticmethod]
fn adaptive() -> Self {
Self {
inner: crate::config::ExtractionProfile::ADAPTIVE,
}
}
#[staticmethod]
fn available() -> Vec<&'static str> {
crate::config::ExtractionProfile::all_profiles().to_vec()
}
fn __repr__(&self) -> String {
format!(
"ExtractionProfile('{}', word_margin_ratio={}, tj_offset_threshold={})",
self.inner.name, self.inner.word_margin_ratio, self.inner.tj_offset_threshold,
)
}
}
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "TsaClient")]
pub struct PyTsaClient {
#[cfg(feature = "tsa-client")]
inner: crate::signatures::TsaClient,
}
#[pymethods]
impl PyTsaClient {
#[new]
#[pyo3(signature = (
url,
username=None,
password=None,
timeout_seconds=30,
hash_algorithm=2,
use_nonce=true,
cert_req=true,
))]
fn new(
url: String,
username: Option<String>,
password: Option<String>,
timeout_seconds: i32,
hash_algorithm: i32,
use_nonce: bool,
cert_req: bool,
) -> PyResult<Self> {
#[cfg(feature = "tsa-client")]
{
let algo = match hash_algorithm {
1 => crate::signatures::HashAlgorithm::Sha1,
2 => crate::signatures::HashAlgorithm::Sha256,
3 => crate::signatures::HashAlgorithm::Sha384,
4 => crate::signatures::HashAlgorithm::Sha512,
_ => crate::signatures::HashAlgorithm::Sha256,
};
let cfg = crate::signatures::TsaClientConfig {
url,
username,
password,
timeout: if timeout_seconds > 0 {
std::time::Duration::from_secs(timeout_seconds as u64)
} else {
std::time::Duration::from_secs(30)
},
hash_algorithm: algo,
use_nonce,
cert_req,
};
Ok(Self {
inner: crate::signatures::TsaClient::new(cfg),
})
}
#[cfg(not(feature = "tsa-client"))]
{
let _ = (url, username, password, timeout_seconds, hash_algorithm, use_nonce, cert_req);
Err(PyNotImplementedError::new_err(
"pdf_oxide was built without the `tsa-client` feature",
))
}
}
fn request_timestamp(&self, data: &Bound<'_, PyBytes>) -> PyResult<PyTimestamp> {
#[cfg(feature = "tsa-client")]
{
let ts = self
.inner
.request_timestamp(data.as_bytes())
.map_err(|e| PyRuntimeError::new_err(format!("TSA error: {e}")))?;
Ok(PyTimestamp { inner: ts })
}
#[cfg(not(feature = "tsa-client"))]
{
let _ = data;
Err(PyNotImplementedError::new_err(
"pdf_oxide was built without the `tsa-client` feature",
))
}
}
fn request_timestamp_hash(
&self,
hash: &Bound<'_, PyBytes>,
hash_algorithm: i32,
) -> PyResult<PyTimestamp> {
#[cfg(feature = "tsa-client")]
{
let algo = match hash_algorithm {
1 => crate::signatures::HashAlgorithm::Sha1,
2 => crate::signatures::HashAlgorithm::Sha256,
3 => crate::signatures::HashAlgorithm::Sha384,
4 => crate::signatures::HashAlgorithm::Sha512,
_ => crate::signatures::HashAlgorithm::Sha256,
};
let ts = self
.inner
.request_timestamp_hash(hash.as_bytes(), algo)
.map_err(|e| PyRuntimeError::new_err(format!("TSA error: {e}")))?;
Ok(PyTimestamp { inner: ts })
}
#[cfg(not(feature = "tsa-client"))]
{
let _ = (hash, hash_algorithm);
Err(PyNotImplementedError::new_err(
"pdf_oxide was built without the `tsa-client` feature",
))
}
}
}
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "Certificate")]
pub struct PyCertificate {
creds: crate::signatures::SigningCredentials,
}
#[pymethods]
impl PyCertificate {
#[staticmethod]
fn load(data: &Bound<'_, PyBytes>) -> PyResult<Self> {
#[cfg(feature = "signatures")]
{
let bytes = data.as_bytes();
if bytes.is_empty() {
return Err(PyValueError::new_err("Certificate data must not be empty"));
}
let creds = crate::signatures::SigningCredentials::from_der(bytes.to_vec())
.map_err(|e| PyValueError::new_err(format!("Invalid certificate: {e}")))?;
Ok(Self { creds })
}
#[cfg(not(feature = "signatures"))]
{
let _ = data;
Err(PyNotImplementedError::new_err(
"Certificate.load(): pdf_oxide was built without --features signatures",
))
}
}
#[getter]
fn subject(&self) -> PyResult<String> {
self.creds
.subject()
.map_err(|e| PyValueError::new_err(format!("{e}")))
}
#[getter]
fn issuer(&self) -> PyResult<String> {
self.creds
.issuer()
.map_err(|e| PyValueError::new_err(format!("{e}")))
}
#[getter]
fn serial(&self) -> PyResult<String> {
self.creds
.serial()
.map_err(|e| PyValueError::new_err(format!("{e}")))
}
#[getter]
fn validity(&self) -> PyResult<(i64, i64)> {
self.creds
.validity()
.map_err(|e| PyValueError::new_err(format!("{e}")))
}
#[getter]
fn is_valid(&self) -> PyResult<bool> {
self.creds
.is_valid()
.map_err(|e| PyValueError::new_err(format!("{e}")))
}
fn __repr__(&self) -> String {
let subject = self
.creds
.subject()
.unwrap_or_else(|_| "<unreadable>".into());
let serial = self
.creds
.serial()
.unwrap_or_else(|_| "<unreadable>".into());
format!("Certificate(subject={subject:?}, serial={serial:?})")
}
}
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "Timestamp")]
pub struct PyTimestamp {
inner: crate::signatures::Timestamp,
}
#[pymethods]
impl PyTimestamp {
#[staticmethod]
fn parse(data: &Bound<'_, PyBytes>) -> PyResult<Self> {
let bytes = data.as_bytes();
if bytes.is_empty() {
return Err(PyValueError::new_err("Timestamp data must not be empty"));
}
let inner = crate::signatures::Timestamp::from_der(bytes)
.map_err(|e| PyValueError::new_err(format!("Invalid timestamp: {e}")))?;
Ok(Self { inner })
}
#[getter]
fn time(&self) -> i64 {
self.inner.time()
}
#[getter]
fn serial(&self) -> String {
self.inner.serial()
}
#[getter]
fn policy_oid(&self) -> String {
self.inner.policy_oid()
}
#[getter]
fn tsa_name(&self) -> String {
self.inner.tsa_name()
}
#[getter]
fn hash_algorithm(&self) -> i32 {
self.inner.hash_algorithm() as i32
}
#[getter]
fn message_imprint(&self, py: Python) -> Py<PyBytes> {
PyBytes::new(py, self.inner.message_imprint_ref()).into()
}
fn verify(&self) -> PyResult<bool> {
Err(PyNotImplementedError::new_err(
"Timestamp.verify() requires CMS signer verification — not yet landed",
))
}
fn __repr__(&self) -> String {
format!(
"Timestamp(time={}, serial={:?}, policy_oid={:?})",
self.inner.time(),
self.inner.serial(),
self.inner.policy_oid(),
)
}
}
#[pyclass(module = "pdf_oxide.pdf_oxide", name = "Signature")]
pub struct PySignature {
info: crate::signatures::SignatureInfo,
}
#[pymethods]
impl PySignature {
#[getter]
fn signer_name(&self) -> Option<String> {
self.info.signer_name.clone()
}
#[getter]
fn reason(&self) -> Option<String> {
self.info.reason.clone()
}
#[getter]
fn location(&self) -> Option<String> {
self.info.location.clone()
}
#[getter]
fn contact_info(&self) -> Option<String> {
self.info.contact_info.clone()
}
#[getter]
fn signing_time(&self) -> Option<i64> {
self.info
.signing_time
.as_deref()
.and_then(crate::signatures::parse_pdf_date_to_epoch)
}
#[getter]
fn covers_whole_document(&self) -> bool {
self.info.covers_whole_document
}
fn verify(&self) -> PyResult<bool> {
let Some(contents) = self.info.contents() else {
return Err(PyNotImplementedError::new_err(
"Signature has no /Contents blob — nothing to verify",
));
};
match crate::signatures::verify_signer(contents) {
Ok(crate::signatures::SignerVerify::Valid) => Ok(true),
Ok(crate::signatures::SignerVerify::Invalid) => Ok(false),
Ok(crate::signatures::SignerVerify::Unknown) => Err(PyNotImplementedError::new_err(
"Signature.verify(): signer uses RSA-PSS, ECDSA, an unknown \
digest OID, or the CMS blob lacks signed_attrs",
)),
Err(e) => Err(PyValueError::new_err(format!(
"Signature.verify(): failed to parse /Contents as CMS: {e}"
))),
}
}
fn verify_detached(&self, pdf_data: &[u8]) -> PyResult<bool> {
let Some(contents) = self.info.contents() else {
return Err(PyNotImplementedError::new_err(
"Signature has no /Contents blob — nothing to verify",
));
};
let br = self.info.byte_range();
if br.len() != 4 {
return Err(PyValueError::new_err(
"Signature has no /ByteRange — cannot extract signed bytes",
));
}
let byte_range: [i64; 4] = [br[0], br[1], br[2], br[3]];
let signed_bytes =
crate::signatures::ByteRangeCalculator::extract_signed_bytes(pdf_data, &byte_range)
.map_err(|e| {
PyValueError::new_err(format!("Failed to extract signed bytes: {e}"))
})?;
match crate::signatures::verify_signer_detached(contents, &signed_bytes) {
Ok(crate::signatures::SignerVerify::Valid) => Ok(true),
Ok(crate::signatures::SignerVerify::Invalid) => Ok(false),
Ok(crate::signatures::SignerVerify::Unknown) => Err(PyNotImplementedError::new_err(
"Signature.verify_detached(): signer uses RSA-PSS, ECDSA, an \
unknown digest, or the CMS blob lacks signed_attrs / messageDigest",
)),
Err(e) => Err(PyValueError::new_err(format!("Signature.verify_detached(): {e}"))),
}
}
fn __repr__(&self) -> String {
format!(
"Signature(signer_name={:?}, reason={:?}, location={:?})",
self.info.signer_name, self.info.reason, self.info.location,
)
}
}
#[pymodule(gil_used = false)]
fn pdf_oxide(m: &Bound<'_, PyModule>) -> PyResult<()> {
init_pyo3_log_handle();
m.add_function(wrap_pyfunction!(setup_logging, m)?)?;
m.add_function(wrap_pyfunction!(set_log_level, m)?)?;
m.add_function(wrap_pyfunction!(get_log_level, m)?)?;
m.add_function(wrap_pyfunction!(disable_logging, m)?)?;
m.add_class::<PyPdfDocument>()?;
m.add_class::<PyPdf>()?;
m.add_class::<PyPdfPage>()?;
m.add_class::<PyPdfText>()?;
m.add_class::<PyPdfTextId>()?;
m.add_class::<PyPdfImage>()?;
m.add_class::<PyPdfElement>()?;
m.add_class::<PyAnnotationWrapper>()?;
m.add_class::<PyTextChar>()?;
m.add_class::<PyTextSpan>()?;
m.add_class::<PyWord>()?;
m.add_class::<PyTextLine>()?;
m.add_class::<PyPdfPageRegion>()?;
m.add_class::<PyDocPage>()?;
m.add_class::<PyDocPageIter>()?;
m.add_class::<PyLayoutParams>()?;
m.add_class::<PyExtractionProfile>()?;
m.add_class::<PyFormField>()?;
m.add_class::<PyOcrEngine>()?;
m.add_class::<PyOcrConfig>()?;
m.add_class::<PyColor>()?;
m.add_class::<PyBlendMode>()?;
m.add_class::<PyExtGState>()?;
m.add_class::<PyDocumentBuilder>()?;
m.add_class::<PyFluentPageBuilder>()?;
m.add_class::<PyEmbeddedFont>()?;
m.add_class::<PyPageTemplate>()?;
m.add_class::<PyArtifact>()?;
m.add_class::<PyHeader>()?;
m.add_class::<PyFooter>()?;
m.add_class::<PyArtifactStyle>()?;
m.add_class::<PyLinearGradient>()?;
m.add_class::<PyRadialGradient>()?;
m.add_class::<PyLineCap>()?;
m.add_class::<PyLineJoin>()?;
m.add_class::<PyPatternPresets>()?;
m.add_class::<PyOfficeConverter>()?;
m.add_class::<PySignature>()?;
m.add_class::<PyCertificate>()?;
m.add_class::<PyTimestamp>()?;
m.add_class::<PyTsaClient>()?;
m.add("VERSION", env!("CARGO_PKG_VERSION"))?;
Ok(())
}