use crate::types::{Anchor, ProjectedLine};
use super::inline::{SpanStyle, line_all_bold, line_uniform_style, render_line_inline};
const PARAGRAPH_GAP_MULTIPLIER: f32 = 1.5;
const FONT_SIZE_PARAGRAPH_TOLERANCE: f32 = 1.5;
const FONT_SIZE_PARAGRAPH_TOLERANCE_REAL: f32 = 0.5;
fn font_size_paragraph_tolerance(prev: &ProjectedLine, cur: &ProjectedLine) -> f32 {
if prev.font_size_is_estimated || cur.font_size_is_estimated {
FONT_SIZE_PARAGRAPH_TOLERANCE
} else {
FONT_SIZE_PARAGRAPH_TOLERANCE_REAL
}
}
const INDENT_TOLERANCE: f32 = 6.0;
pub(super) fn collapse_whitespace(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let mut prev_space = false;
for c in s.chars() {
if c.is_whitespace() {
if !prev_space && !out.is_empty() {
out.push(' ');
}
prev_space = true;
} else {
out.push(c);
prev_space = false;
}
}
if out.ends_with(' ') {
out.pop();
}
out
}
pub(super) fn ends_hyphenated(text: &str) -> bool {
let t = text.trim_end();
t.ends_with('-') && t.chars().rev().nth(1).is_some_and(|c| c.is_alphabetic())
}
pub(super) fn ends_sentence_final(text: &str) -> bool {
let t = text
.trim_end()
.trim_end_matches(|c| matches!(c, '"' | '\'' | ')' | ']' | '»' | '”' | '’'));
t.chars()
.next_back()
.is_some_and(|c| matches!(c, '.' | '!' | '?'))
}
pub(super) fn is_soft_hyphen_break(prev: &str, next: &str) -> bool {
ends_hyphenated(prev)
&& next
.trim_start()
.chars()
.next()
.is_some_and(|c| c.is_lowercase())
}
pub(super) fn dehyphenate_join(prev: &mut String, check: &str, to_append: &str) {
if check.is_empty() {
return;
}
if prev.is_empty() {
prev.push_str(to_append);
return;
}
if is_soft_hyphen_break(prev, check) {
while prev.ends_with(|c: char| c.is_whitespace()) {
prev.pop();
}
prev.pop(); prev.push_str(to_append.trim_start());
} else {
prev.push(' ');
prev.push_str(to_append);
}
}
pub(super) fn continues_heading(prev: &ProjectedLine, cur: &ProjectedLine) -> bool {
let centered_mismatch = (prev.anchor == Anchor::Center) ^ (cur.anchor == Anchor::Center);
if centered_mismatch {
return false;
}
if (prev.dominant_font_size - cur.dominant_font_size).abs()
> font_size_paragraph_tolerance(prev, cur)
{
return false;
}
if let (Some(p), Some(c)) = (line_uniform_style(prev), line_uniform_style(cur))
&& p.bold != c.bold
{
return false;
}
if line_all_bold(prev) != line_all_bold(cur) {
return false;
}
if prev.region_path != cur.region_path {
return false;
}
let prev_bottom = prev.bbox.y + prev.bbox.height;
let gap = cur.bbox.y - prev_bottom;
let line_height = prev.bbox.height.max(cur.bbox.height).max(1.0);
gap <= line_height * PARAGRAPH_GAP_MULTIPLIER
}
pub(super) fn continues_paragraph(prev: &ProjectedLine, cur: &ProjectedLine) -> bool {
let centered_mismatch = (prev.anchor == Anchor::Center) ^ (cur.anchor == Anchor::Center);
if centered_mismatch {
return false;
}
if (prev.dominant_font_size - cur.dominant_font_size).abs()
> font_size_paragraph_tolerance(prev, cur)
{
return false;
}
if let (Some(p), Some(c)) = (line_uniform_style(prev), line_uniform_style(cur))
&& p.bold != c.bold
{
return false;
}
if line_all_bold(prev) != line_all_bold(cur) {
return false;
}
if prev.region_path != cur.region_path {
let prev_trim = prev.text.trim_end();
let ends_open = !prev_trim.ends_with(|c: char| {
matches!(
c,
'.' | '!' | '?' | ':' | ';' | '”' | '"' | ')' | ']' | '。' | '』' | '」'
)
});
let starts_lower = cur
.text
.trim_start()
.chars()
.next()
.is_some_and(|c| c.is_lowercase());
return ends_open && starts_lower;
}
if (prev.indent_x - cur.indent_x).abs() > INDENT_TOLERANCE && cur.anchor == Anchor::Left {
if cur.indent_x > prev.indent_x + INDENT_TOLERANCE {
return false;
}
}
let prev_bottom = prev.bbox.y + prev.bbox.height;
let gap = cur.bbox.y - prev_bottom;
let line_height = prev.bbox.height.max(cur.bbox.height).max(1.0);
gap <= line_height * PARAGRAPH_GAP_MULTIPLIER
}
pub(super) struct ParaAccum {
pub(super) raw: String,
pub(super) inline: String,
pub(super) last: ProjectedLine,
pub(super) uniform: Option<(bool, bool)>,
}
pub(super) fn append_to_paragraph(accum: &mut ParaAccum, next_line: &ProjectedLine) {
let next_raw = collapse_whitespace(next_line.text.trim());
if next_raw.is_empty() {
return;
}
let next_inline = render_line_inline(next_line);
let next_uniform: Option<SpanStyle> = line_uniform_style(next_line).filter(|s| !s.strike);
if accum.raw.is_empty() {
accum.raw.push_str(&next_raw);
accum.inline.push_str(&next_inline);
accum.uniform = next_uniform.map(|s| (s.bold, s.italic));
accum.last = next_line.clone();
return;
}
dehyphenate_join(&mut accum.raw, &next_raw, &next_raw);
dehyphenate_join(&mut accum.inline, &next_raw, &next_inline);
accum.uniform = match (accum.uniform, next_uniform) {
(Some(cur), Some(s)) if cur == (s.bold, s.italic) => Some(cur),
_ => None,
};
accum.last = next_line.clone();
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn dehyphenate_join_only_strips_before_lowercase() {
let mut s = String::from("co-");
dehyphenate_join(&mut s, "operate", "operate");
assert_eq!(s, "cooperate");
let mut s = String::from("Vitamin-");
dehyphenate_join(&mut s, "A", "A");
assert_eq!(s, "Vitamin- A");
}
}