use crate::config::ImageMode;
use crate::markdown_layout::{
build_heading_map, classify_page_with_filters, compute_body_size, compute_header_footer_set,
detect_single_page_chrome, render_blocks,
};
use crate::types::{OutlineTarget, ParsedPage};
pub fn format_markdown(
pages: &[ParsedPage],
outline: &[OutlineTarget],
image_mode: ImageMode,
) -> String {
if pages.is_empty() {
return String::new();
}
let body_size = compute_body_size(pages);
let heading_map = build_heading_map(pages, body_size);
let header_footer = compute_header_footer_set(pages);
let mut out = String::new();
for (i, page) in pages.iter().enumerate() {
if i > 0 {
out.push_str("\n\n-----\n\n");
}
if page.projected_lines.is_empty() {
out.push_str("```text\n");
out.push_str(&page.text);
if !page.text.ends_with('\n') {
out.push('\n');
}
out.push_str("```");
continue;
}
let target_index = (page.page_number as i32).saturating_sub(1);
let page_outline: Vec<OutlineTarget> = outline
.iter()
.filter(|e| e.page_index == target_index)
.cloned()
.collect();
let chrome_indices = detect_single_page_chrome(page, body_size);
let mut blocks = classify_page_with_filters(
page,
&heading_map,
&header_footer,
&page_outline,
image_mode,
&chrome_indices,
);
let has_content = blocks.iter().any(|b| {
!matches!(
b,
crate::markdown_layout::Block::HorizontalRule
| crate::markdown_layout::Block::Figure { .. }
)
});
if !has_content {
blocks.retain(|b| !matches!(b, crate::markdown_layout::Block::HorizontalRule));
}
dedupe_rules(&mut blocks);
out.push_str(&render_blocks(&blocks));
}
out
}
fn dedupe_rules(blocks: &mut Vec<crate::markdown_layout::Block>) {
use crate::markdown_layout::Block::HorizontalRule;
while matches!(blocks.first(), Some(HorizontalRule)) {
blocks.remove(0);
}
while matches!(blocks.last(), Some(HorizontalRule)) {
blocks.pop();
}
blocks.dedup_by(|a, b| matches!((a, b), (HorizontalRule, HorizontalRule)));
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::{Anchor, ProjectedLine, Rect, TextItem};
fn line(text: &str, x: f32, y: f32, h: f32, size: f32) -> ProjectedLine {
ProjectedLine {
text: text.into(),
bbox: Rect {
x,
y,
width: text.chars().count() as f32 * (size * 0.5),
height: h,
},
anchor: Anchor::Left,
indent_x: x,
dominant_font_size: size,
font_size_is_estimated: false,
heading_font_size: None,
dominant_font_name: Some("Arial".into()),
all_bold: false,
all_italic: false,
all_mono: false,
all_strike: false,
spans: vec![TextItem::default()],
region_path: Vec::new(),
mcid: None,
in_figure: false,
}
}
fn page_with(n: usize, lines: Vec<ProjectedLine>) -> ParsedPage {
ParsedPage {
page_number: n,
page_width: 612.0,
page_height: 792.0,
text: "fallback".into(),
text_items: vec![],
projected_lines: lines,
regions: crate::types::Region::default(),
graphics: vec![],
figures: vec![],
struct_nodes: vec![],
image_refs: vec![],
}
}
#[test]
fn test_empty() {
assert_eq!(format_markdown(&[], &[], ImageMode::Placeholder), "");
}
#[test]
fn dedupe_rules_drops_edges_and_collapses_runs() {
use crate::markdown_layout::Block::{self, HorizontalRule, Paragraph};
let p = |t: &str| Paragraph {
text: t.into(),
bold: false,
italic: false,
};
let mut blocks = vec![
HorizontalRule,
p("a"),
HorizontalRule,
HorizontalRule,
p("b"),
HorizontalRule,
];
dedupe_rules(&mut blocks);
let kinds: Vec<bool> = blocks
.iter()
.map(|b| matches!(b, Block::HorizontalRule))
.collect();
assert_eq!(kinds, vec![false, true, false]);
}
#[test]
fn test_fallback_when_no_projected_lines() {
let p = ParsedPage {
page_number: 1,
page_width: 0.0,
page_height: 0.0,
text: "hello".into(),
text_items: vec![],
projected_lines: vec![],
regions: crate::types::Region::default(),
graphics: vec![],
figures: vec![],
struct_nodes: vec![],
image_refs: vec![],
};
let out = format_markdown(&[p], &[], ImageMode::Placeholder);
assert!(out.contains("```text"));
assert!(out.contains("hello"));
}
#[test]
fn test_heading_and_paragraph() {
let p = page_with(
1,
vec![
line("My Title For This Test Document", 50.0, 50.0, 18.0, 18.0),
line("First sentence of body prose here.", 50.0, 80.0, 10.0, 10.0),
line(
"Second sentence of body prose here.",
50.0,
92.0,
10.0,
10.0,
),
line(
"Third sentence of body prose here.",
50.0,
104.0,
10.0,
10.0,
),
],
);
let out = format_markdown(&[p], &[], ImageMode::Placeholder);
assert!(out.contains("# My Title For This Test Document"));
assert!(out.contains("First sentence of body prose here."));
}
#[test]
fn test_multi_page_separator() {
let a = page_with(1, vec![line("A page.", 50.0, 80.0, 10.0, 10.0)]);
let b = page_with(2, vec![line("B page.", 50.0, 80.0, 10.0, 10.0)]);
let out = format_markdown(&[a, b], &[], ImageMode::Placeholder);
assert!(out.contains("-----"));
assert!(out.find("A page.").unwrap() < out.find("B page.").unwrap());
}
}