#![allow(unused)]
use crate::bindgen::FPDF_PAGEOBJECT;
use crate::error::PdfiumError;
use crate::pdf::document::fonts::PdfFontToken;
use crate::pdf::document::page::object::group::PdfPageGroupObject;
use crate::pdf::document::page::object::private::internal::PdfPageObjectPrivate;
use crate::pdf::document::page::object::text::PdfPageTextObject;
use crate::pdf::document::page::object::{PdfPageObject, PdfPageObjectCommon};
use crate::pdf::document::page::objects::common::PdfPageObjectsCommon;
use crate::pdf::document::page::PdfPage;
use crate::pdf::document::PdfDocument;
use crate::pdf::font::PdfFont;
use crate::pdf::points::PdfPoints;
use itertools::Itertools;
use maybe_owned::MaybeOwned;
use std::cmp::Ordering;
pub struct PdfStyledString<'a> {
text: String,
font: MaybeOwned<'a, PdfFont<'a>>,
font_size: PdfPoints,
}
impl<'a> PdfStyledString<'a> {
#[inline]
pub fn new(text: String, font: &'a PdfFont<'a>, font_size: PdfPoints) -> Self {
PdfStyledString {
text,
font: MaybeOwned::Borrowed(font),
font_size,
}
}
#[inline]
pub fn from_text_object(text_object: &'a PdfPageTextObject<'a>) -> Self {
PdfStyledString {
text: text_object.text(),
font: MaybeOwned::Owned(text_object.font()),
font_size: text_object.unscaled_font_size(),
}
}
#[inline]
pub(crate) fn push(&mut self, text: impl ToString, separator: &str) {
if !self.text.ends_with(separator) {
self.text.push_str(separator);
}
self.text.push_str(text.to_string().as_str());
}
#[inline]
pub fn text(&self) -> &str {
self.text.as_str()
}
#[inline]
pub fn font(&self) -> &PdfFont<'_> {
self.font.as_ref()
}
#[inline]
pub fn font_size(&self) -> PdfPoints {
self.font_size
}
#[inline]
pub fn does_match_string_styling(&self, other: &PdfStyledString) -> bool {
self.does_match_raw_styling(other.font_size(), other.font())
}
#[inline]
pub fn does_match_object_styling(&self, other: &PdfPageTextObject) -> bool {
self.does_match_raw_styling(other.unscaled_font_size(), &other.font())
}
fn does_match_raw_styling(&self, other_font_size: PdfPoints, other_font: &PdfFont) -> bool {
println!(
"does_match_object_styling()? {} ==? {}, {:?} ==? {:?}, {} ==? {}, {} ==? {}, {} ==? {}",
self.font_size().value,
other_font_size.value,
self.font().handle(),
other_font.handle(),
self.font().is_all_caps(),
other_font.is_all_caps(),
self.font().is_small_caps(),
other_font.is_small_caps(),
self.font().family(),
other_font.family()
);
if self.font_size() != other_font_size {
return false;
}
let this_font = self.font();
if this_font.handle() != other_font.handle() {
return false;
}
let this_font_name = this_font.family();
let other_font_name = other_font.family();
if this_font_name.is_empty() && other_font_name.is_empty() {
return true;
}
(!this_font_name.is_empty() || !other_font_name.is_empty())
&& this_font_name == other_font_name
}
#[inline]
pub fn as_text_object(
&self,
document: &'a PdfDocument<'a>,
) -> Result<PdfPageTextObject<'a>, PdfiumError> {
PdfPageTextObject::new(document, self.text(), self.font(), self.font_size())
}
}
enum PdfParagraphFragment<'a> {
StyledString(PdfStyledString<'a>),
LineBreak(PdfLineAlignment),
NonTextObject(FPDF_PAGEOBJECT),
}
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum PdfParagraphOverflowBehaviour {
FixHeightExpandWidth,
FixWidthExpandHeight,
Clip,
}
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum PdfParagraphAlignment {
LeftAlign,
RightAlign,
Center,
Justify,
ForceJustify,
}
#[derive(Copy, Clone, Debug, PartialEq)]
enum PdfLineAlignment {
None,
LeftAlign,
RightAlign,
Center,
Justify,
}
struct PdfLine<'a> {
alignment: PdfLineAlignment,
bottom: PdfPoints,
left: PdfPoints,
width: PdfPoints,
fragments: Vec<PdfParagraphFragment<'a>>,
}
impl<'a> PdfLine<'a> {
#[inline]
fn new(
alignment: PdfLineAlignment,
bottom: PdfPoints,
left: PdfPoints,
width: PdfPoints,
fragments: Vec<PdfParagraphFragment<'a>>,
) -> Self {
PdfLine {
alignment,
bottom,
left,
width,
fragments,
}
}
}
#[doc(hidden)]
pub struct PdfParagraph<'a> {
fragments: Vec<PdfParagraphFragment<'a>>,
bottom: Option<PdfPoints>,
left: Option<PdfPoints>,
max_width: Option<PdfPoints>,
max_height: Option<PdfPoints>,
overflow: PdfParagraphOverflowBehaviour,
alignment: PdfParagraphAlignment,
first_line_indent: PdfPoints,
}
impl<'a> PdfParagraph<'a> {
pub fn from_objects(objects: &'a [PdfPageObject<'a>]) -> Vec<PdfParagraph<'a>> {
let mut lines = Vec::new();
let mut current_line_fragments = Vec::new();
let mut objects_bottom = None;
let mut objects_top = None;
let mut objects_left = None;
let mut objects_right = None;
let positioned_objects = objects
.iter()
.map(|object| {
let object_bottom = object
.bounds()
.map(|bounds| bounds.bottom())
.unwrap_or(PdfPoints::ZERO);
match objects_bottom {
Some(paragraph_bottom) => {
if paragraph_bottom > object_bottom {
objects_bottom = Some(object_bottom);
}
}
None => objects_bottom = Some(object_bottom),
};
let object_top = object
.bounds()
.map(|bounds| bounds.top())
.unwrap_or(PdfPoints::ZERO);
match objects_top {
Some(paragraph_top) => {
if paragraph_top < object_top {
objects_top = Some(object_top);
}
}
None => objects_top = Some(object_top),
}
let object_height = object
.bounds()
.map(|bounds| bounds.height())
.unwrap_or(PdfPoints::ZERO);
let object_left = object
.bounds()
.map(|bounds| bounds.left())
.unwrap_or(PdfPoints::ZERO);
match objects_left {
Some(paragraph_left) => {
if paragraph_left > object_left {
objects_left = Some(object_left);
}
}
None => objects_left = Some(object_left),
}
let object_right = object
.bounds()
.map(|bounds| bounds.right())
.unwrap_or(PdfPoints::ZERO);
match objects_right {
Some(paragraph_right) => {
if paragraph_right < object_right {
objects_right = Some(object_right);
}
}
None => objects_right = Some(object_right),
}
(object_bottom, object_top, object_left, object_right, object)
})
.sorted_by(|a, b| {
let (a_bottom, a_top, _, a_right) = (a.0, a.1, a.2, a.3);
let (b_bottom, b_top, b_left, _) = (b.0, b.1, b.2, b.3);
if b_top > a_bottom {
Ordering::Greater
} else if b_top < a_bottom {
Ordering::Less
} else if a_right < b_left {
Ordering::Less
} else {
Ordering::Greater
}
})
.collect::<Vec<_>>();
let paragraph_left = objects_left.unwrap_or(PdfPoints::ZERO);
let paragraph_right = objects_right.unwrap_or(paragraph_left);
let mut current_line_bottom = PdfPoints::ZERO;
let mut current_line_left = PdfPoints::ZERO;
let mut current_line_right = PdfPoints::ZERO;
let mut current_line_alignment = PdfLineAlignment::None;
let mut last_object_bottom = None;
let mut last_object_height = None;
let mut last_object_left = None;
let mut last_object_right = None;
let mut last_object_width = None;
for (bottom, top, left, right, object) in positioned_objects.iter() {
let top = *top;
let bottom = *bottom;
let left = *left;
let right = *right;
if last_object_left.is_none() || left < last_object_left.unwrap() {
let next_line_alignment = Self::guess_line_alignment(
last_object_left,
last_object_right,
left,
right,
paragraph_left,
paragraph_right,
);
if next_line_alignment != current_line_alignment
|| last_object_bottom.unwrap_or(PdfPoints::ZERO)
- last_object_height.unwrap_or(PdfPoints::ZERO)
> top
{
println!(
"starting a new line with alignment {:?}",
next_line_alignment
);
lines.push(PdfLine::new(
current_line_alignment,
current_line_bottom,
current_line_left,
right - current_line_left,
current_line_fragments,
));
current_line_fragments =
vec![PdfParagraphFragment::LineBreak(current_line_alignment)];
current_line_left = left;
current_line_bottom = bottom;
current_line_alignment = next_line_alignment;
} else {
println!("carriage return");
}
}
last_object_left = Some(left);
last_object_right = Some(right);
last_object_width = Some(right - left);
last_object_bottom = Some(bottom);
last_object_height = Some(top - bottom);
if let Some(object) = object.as_text_object() {
if let Some(PdfParagraphFragment::StyledString(last_string)) =
current_line_fragments.last_mut()
{
if last_string.does_match_object_styling(object) {
let separator = if let Ok(bounds) = object.bounds() {
if let Some(last_object_right) = last_object_right {
if last_object_right > bounds.left() {
""
} else {
" "
}
} else {
""
}
} else {
" "
};
println!(
"styling matches, push \"{}\" onto \"{}\", separated by \"{}\"",
object.text(),
last_string.text(),
separator
);
last_string.push(object.text(), separator);
println!(
"last_object_right = {:?}, this object left = {:?}",
last_object_right,
object.bounds().unwrap().left(),
);
} else {
println!(
"styling differs, start new fragment with \"{}\"",
object.text()
);
current_line_fragments.push(PdfParagraphFragment::StyledString(
PdfStyledString::from_text_object(object),
));
}
} else {
println!("start new text fragment with \"{}\"", object.text());
current_line_fragments.push(PdfParagraphFragment::StyledString(
PdfStyledString::from_text_object(object),
));
}
} else {
current_line_fragments
.push(PdfParagraphFragment::NonTextObject(object.object_handle()));
}
}
lines.push(PdfLine::new(
current_line_alignment,
current_line_bottom,
current_line_left,
current_line_right - current_line_left,
current_line_fragments,
));
let mut paragraphs = Vec::new();
let mut current_paragraph_fragments = Vec::new();
let mut current_paragraph_bottom = None;
let mut current_paragraph_left = None;
let mut current_paragraph_right = None;
let mut current_paragraph_first_line_left = None;
let mut last_line_alignment = lines
.first()
.map(|line| line.alignment)
.unwrap_or(PdfLineAlignment::None);
let mut first_line_alignment = last_line_alignment;
for mut line in lines.drain(..) {
println!("********* got line: {:?}", line.alignment);
if line.alignment != last_line_alignment {
if !current_paragraph_fragments.is_empty() {
paragraphs.push(Self::paragraph_from_lines(
current_paragraph_fragments,
current_paragraph_bottom,
current_paragraph_left,
current_paragraph_right,
current_paragraph_first_line_left,
first_line_alignment,
last_line_alignment,
));
current_paragraph_fragments = Vec::new();
current_paragraph_bottom = None;
current_paragraph_left = None;
current_paragraph_right = None;
current_paragraph_first_line_left = None;
first_line_alignment = last_line_alignment
}
}
current_paragraph_fragments.append(&mut line.fragments);
last_line_alignment = line.alignment;
if let Some(paragraph_left) = current_paragraph_left {
if line.left < paragraph_left {
current_paragraph_left = Some(line.left);
}
} else {
current_paragraph_left = Some(line.left);
}
if let Some(paragraph_right) = current_paragraph_right {
if line.left + line.width > paragraph_right {
current_paragraph_right = Some(line.left + line.width);
}
} else {
current_paragraph_right = Some(line.left + line.width);
}
if let Some(paragraph_bottom) = current_paragraph_bottom {
if line.bottom < paragraph_bottom {
current_paragraph_bottom = Some(line.bottom);
}
} else {
current_paragraph_bottom = Some(line.bottom);
}
if current_paragraph_first_line_left.is_none() {
current_paragraph_first_line_left = Some(line.left);
}
}
paragraphs.push(Self::paragraph_from_lines(
current_paragraph_fragments,
current_paragraph_bottom,
current_paragraph_left,
current_paragraph_right,
current_paragraph_first_line_left,
first_line_alignment,
last_line_alignment,
));
paragraphs
}
fn paragraph_from_lines(
fragments: Vec<PdfParagraphFragment<'a>>,
bottom: Option<PdfPoints>,
left: Option<PdfPoints>,
right: Option<PdfPoints>,
first_line_left: Option<PdfPoints>,
first_line_alignment: PdfLineAlignment,
last_line_alignment: PdfLineAlignment,
) -> PdfParagraph<'a> {
PdfParagraph {
fragments,
bottom,
left,
max_width: match (left, right) {
(Some(left), Some(right)) => Some(right - left),
_ => None,
},
max_height: None,
overflow: PdfParagraphOverflowBehaviour::FixWidthExpandHeight,
alignment: if first_line_alignment == last_line_alignment
&& first_line_alignment == PdfLineAlignment::Justify
{
PdfParagraphAlignment::ForceJustify
} else {
match first_line_alignment {
PdfLineAlignment::None | PdfLineAlignment::LeftAlign => {
PdfParagraphAlignment::LeftAlign
}
PdfLineAlignment::RightAlign => PdfParagraphAlignment::RightAlign,
PdfLineAlignment::Center => PdfParagraphAlignment::Center,
PdfLineAlignment::Justify => PdfParagraphAlignment::Justify,
}
},
first_line_indent: match (first_line_left, left) {
(Some(first_line_left), Some(left)) => first_line_left - left,
_ => PdfPoints::ZERO,
},
}
}
fn guess_line_alignment(
previous_line_left: Option<PdfPoints>,
previous_line_right: Option<PdfPoints>,
line_left: PdfPoints,
line_right: PdfPoints,
paragraph_left: PdfPoints,
paragraph_right: PdfPoints,
) -> PdfLineAlignment {
const ALIGNMENT_THRESHOLD: f32 = 2.0;
if let (Some(previous_line_left), Some(previous_line_right)) =
(previous_line_left, previous_line_right)
{
let is_aligned_left =
(previous_line_left.value - line_left.value).abs() < ALIGNMENT_THRESHOLD;
let is_aligned_right =
(previous_line_right.value - line_right.value).abs() < ALIGNMENT_THRESHOLD;
match (is_aligned_left, is_aligned_right) {
(true, true) => PdfLineAlignment::Justify,
(true, false) => PdfLineAlignment::LeftAlign,
(false, true) => PdfLineAlignment::RightAlign,
(false, false) => PdfLineAlignment::Center,
}
} else {
let is_aligned_left =
(paragraph_left.value - line_left.value).abs() < ALIGNMENT_THRESHOLD;
let is_aligned_right =
(paragraph_right.value - line_right.value).abs() < ALIGNMENT_THRESHOLD;
match (is_aligned_left, is_aligned_right) {
(true, true) => PdfLineAlignment::Justify,
(true, false) => PdfLineAlignment::LeftAlign,
(false, true) => PdfLineAlignment::RightAlign,
(false, false) => PdfLineAlignment::Center,
}
}
}
#[inline]
pub fn empty(
maximum_width: PdfPoints,
overflow: PdfParagraphOverflowBehaviour,
alignment: PdfParagraphAlignment,
) -> Self {
PdfParagraph {
fragments: vec![],
bottom: None,
left: None,
max_width: Some(maximum_width),
max_height: None,
overflow,
alignment,
first_line_indent: PdfPoints::ZERO,
}
}
#[inline]
pub fn is_empty(&self) -> bool {
self.fragments.is_empty()
}
#[inline]
pub fn push(&mut self, string: PdfStyledString<'a>) {
if let Some(PdfParagraphFragment::StyledString(last_string)) = self.fragments.last_mut() {
if last_string.does_match_string_styling(&string) {
last_string.push(string.text(), " ");
} else {
self.fragments
.push(PdfParagraphFragment::StyledString(string));
}
} else {
self.fragments
.push(PdfParagraphFragment::StyledString(string));
}
}
#[inline]
pub fn maximum_width(&self) -> PdfPoints {
self.max_width.unwrap_or(PdfPoints::ZERO)
}
#[inline]
pub fn set_maximum_width(&mut self, width: PdfPoints) {
self.max_width = Some(width);
}
#[inline]
pub fn set_maximum_height(&mut self, height: PdfPoints) {
self.max_height = Some(height);
}
#[inline]
pub fn text(&self) -> String {
println!(">>>> text(): fragments count = {}", self.fragments.len());
self.fragments
.iter()
.filter_map(|fragment| match fragment {
PdfParagraphFragment::StyledString(ref string) => Some(string.text.as_str()),
PdfParagraphFragment::LineBreak(_) => Some("\n"),
_ => None,
})
.collect::<Vec<_>>()
.join("")
}
pub fn text_separated(&self, separator: &str) -> String {
self.fragments
.iter()
.filter_map(|fragment| match fragment {
PdfParagraphFragment::StyledString(ref string) => Some(string.text.as_str()),
_ => None,
})
.collect::<Vec<_>>()
.join(separator)
}
fn to_lines(&self) -> Vec<PdfLine<'_>> {
todo!()
}
pub fn as_group(&self) -> PdfPageGroupObject<'_> {
todo!()
}
pub fn d(&self) {
for (index, f) in self.fragments.iter().enumerate() {
match f {
PdfParagraphFragment::StyledString(s) => {
println!("{}: {}", index, s.text());
}
PdfParagraphFragment::LineBreak(_) => {
println!("{}: line break", index);
}
PdfParagraphFragment::NonTextObject(_) => {
println!("{}: not a text object", index);
}
}
}
}
}
#[cfg(test)]
mod tests {
use crate::pdf::document::page::paragraph::PdfParagraph;
use crate::prelude::*;
use crate::utils::test::test_bind_to_pdfium;
#[test]
fn test_paragraph_construction() -> Result<(), PdfiumError> {
let pdfium = test_bind_to_pdfium();
let document = pdfium.load_pdf_from_file("./test/text-test.pdf", None)?;
let page = document.pages().get(0)?;
let objects = page.objects().iter().collect::<Vec<_>>();
let paragraphs = PdfParagraph::from_objects(objects.as_slice());
for p in paragraphs.iter() {
p.d();
}
Ok(())
}
}