use crate::model::{Block, Document, Paragraph};
#[derive(Debug, Clone)]
pub struct HeadingConfig {
pub max_heading_level: u8,
pub max_text_length: usize,
pub trust_explicit_styles: bool,
pub analyze_sequences: bool,
pub min_sequence_count: usize,
pub enable_statistical_inference: bool,
pub size_threshold_ratio: f32,
pub normalize_levels: bool,
pub normalize_min_level: u8,
}
impl Default for HeadingConfig {
fn default() -> Self {
Self {
max_heading_level: 4,
max_text_length: 80,
trust_explicit_styles: true,
analyze_sequences: true,
min_sequence_count: 2,
enable_statistical_inference: true, size_threshold_ratio: 1.15, normalize_levels: true, normalize_min_level: 2, }
}
}
impl HeadingConfig {
pub fn new() -> Self {
Self::default()
}
pub fn with_max_level(mut self, level: u8) -> Self {
self.max_heading_level = level.clamp(1, 6);
self
}
pub fn with_max_text_length(mut self, length: usize) -> Self {
self.max_text_length = length;
self
}
pub fn with_trust_explicit(mut self, trust: bool) -> Self {
self.trust_explicit_styles = trust;
self
}
pub fn with_sequence_analysis(mut self, analyze: bool) -> Self {
self.analyze_sequences = analyze;
self
}
pub fn with_min_sequence_count(mut self, count: usize) -> Self {
self.min_sequence_count = count.max(2);
self
}
pub fn with_statistical_inference(mut self, enable: bool) -> Self {
self.enable_statistical_inference = enable;
self
}
pub fn with_size_ratio(mut self, ratio: f32) -> Self {
self.size_threshold_ratio = ratio.max(1.0);
self
}
pub fn with_normalize_levels(mut self, enable: bool) -> Self {
self.normalize_levels = enable;
self
}
pub fn with_normalize_min_level(mut self, level: u8) -> Self {
self.normalize_min_level = level.clamp(1, 3);
self
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HeadingDecision {
Explicit(u8),
Inferred(u8),
Demoted,
None,
}
impl HeadingDecision {
pub fn is_heading(&self) -> bool {
matches!(
self,
HeadingDecision::Explicit(_) | HeadingDecision::Inferred(_)
)
}
pub fn level(&self) -> Option<u8> {
match self {
HeadingDecision::Explicit(level) | HeadingDecision::Inferred(level) => Some(*level),
_ => None,
}
}
}
#[derive(Debug, Clone, Default)]
pub struct DocumentStats {
pub font_sizes: std::collections::HashMap<u32, usize>,
pub base_font_size: Option<f32>,
pub bold_paragraphs: usize,
pub total_paragraphs: usize,
pub explicit_heading_count: usize,
}
impl DocumentStats {
pub fn calculate_base_font_size(&mut self) {
self.base_font_size = self
.font_sizes
.iter()
.max_by_key(|(_, weight)| *weight)
.map(|(size, _)| *size as f32 / 10.0);
}
pub fn is_larger_than_base(&self, size: f32, ratio: f32) -> bool {
if let Some(base) = self.base_font_size {
size >= base * ratio
} else {
false
}
}
}
pub struct HeadingAnalyzer {
config: HeadingConfig,
stats: DocumentStats,
}
impl HeadingAnalyzer {
pub fn new(config: HeadingConfig) -> Self {
Self {
config,
stats: DocumentStats::default(),
}
}
pub fn with_defaults() -> Self {
Self::new(HeadingConfig::default())
}
pub fn analyze(&mut self, doc: &Document) -> Vec<HeadingDecision> {
let paragraphs: Vec<&Paragraph> = doc
.sections
.iter()
.flat_map(|section| {
section.content.iter().filter_map(|block| {
if let Block::Paragraph(para) = block {
Some(para)
} else {
None
}
})
})
.collect();
if self.config.enable_statistical_inference {
self.collect_stats(¶graphs);
}
let mut decisions = self.analyze_paragraphs(¶graphs);
if self.config.normalize_levels {
self.normalize_heading_levels(&mut decisions);
}
decisions
}
fn normalize_heading_levels(&self, decisions: &mut [HeadingDecision]) {
let min_level = decisions.iter().filter_map(|d| d.level()).min();
let min_level = match min_level {
Some(level) => level,
None => return, };
let target = self.config.normalize_min_level;
if min_level <= target {
return; }
let shift = min_level - target;
for decision in decisions.iter_mut() {
*decision = match *decision {
HeadingDecision::Explicit(level) => {
let new_level = level.saturating_sub(shift).max(1);
HeadingDecision::Explicit(new_level)
}
HeadingDecision::Inferred(level) => {
let new_level = level.saturating_sub(shift).max(1);
HeadingDecision::Inferred(new_level)
}
other => other,
};
}
}
fn collect_stats(&mut self, paragraphs: &[&Paragraph]) {
self.stats = DocumentStats::default();
for para in paragraphs {
self.stats.total_paragraphs += 1;
if para.style.heading_level > 0 {
self.stats.explicit_heading_count += 1;
}
if let Some(size) = para.dominant_font_size() {
let key = (size * 10.0) as u32;
let text_len = para.plain_text().chars().count();
*self.stats.font_sizes.entry(key).or_insert(0) += text_len;
}
if para.is_all_bold() {
self.stats.bold_paragraphs += 1;
}
}
self.stats.calculate_base_font_size();
}
pub fn analyze_paragraphs(&self, paragraphs: &[&Paragraph]) -> Vec<HeadingDecision> {
let mut decisions = Vec::with_capacity(paragraphs.len());
for para in paragraphs {
decisions.push(self.decide_heading(para));
}
if self.config.analyze_sequences {
self.apply_sequence_analysis(paragraphs, &mut decisions);
}
decisions
}
fn decide_heading(&self, para: &Paragraph) -> HeadingDecision {
let plain_text = para.plain_text();
let trimmed = plain_text.trim();
let style = ¶.style;
if self.looks_like_bullet_item(trimmed) {
return if style.heading_level > 0 {
HeadingDecision::Demoted
} else {
HeadingDecision::None
};
}
if trimmed.chars().count() > self.config.max_text_length {
return if style.heading_level > 0 {
HeadingDecision::Demoted
} else {
HeadingDecision::None
};
}
if style.heading_level > 0 && self.config.trust_explicit_styles {
let level = self.cap_heading_level(style.heading_level);
return HeadingDecision::Explicit(level);
}
if self.config.enable_statistical_inference {
if let Some(inferred) = self.infer_heading_from_style(para) {
return HeadingDecision::Inferred(inferred);
}
}
if style.heading_level > 0
&& !self.looks_like_bullet_item(trimmed)
&& !self.looks_like_caption(trimmed)
&& trimmed.chars().count() <= self.config.max_text_length
{
let level = self.cap_heading_level(style.heading_level);
return HeadingDecision::Explicit(level);
}
HeadingDecision::None
}
fn infer_heading_from_style(&self, para: &Paragraph) -> Option<u8> {
use crate::model::Alignment;
let text = para.plain_text();
let trimmed = text.trim();
if trimmed.is_empty() {
return None;
}
let font_size = para.dominant_font_size()?;
let effective_ratio = if para.style.alignment == Alignment::Center {
self.config.size_threshold_ratio.max(1.5)
} else {
self.config.size_threshold_ratio
};
if !self.stats.is_larger_than_base(font_size, effective_ratio) {
return None;
}
if trimmed.chars().count() > self.config.max_text_length {
return None;
}
let level = self.infer_level_from_size(font_size, para.is_all_bold());
Some(self.cap_heading_level(level))
}
fn infer_level_from_size(&self, size: f32, is_bold: bool) -> u8 {
let base = self.stats.base_font_size.unwrap_or(12.0);
let ratio = size / base;
let base_level = if ratio >= 1.8 {
1 } else if ratio >= 1.5 {
2 } else if ratio >= 1.3 {
3 } else {
4 };
if is_bold && base_level > 1 {
base_level - 1
} else {
base_level
}
}
fn looks_like_bullet_item(&self, text: &str) -> bool {
let trimmed = text.trim_start();
if trimmed.is_empty() {
return false;
}
const BULLET_MARKERS: &[char] = &[
'ㅇ', 'ㆍ', '○', '●', '◎', '■', '□', '▪', '▫', '◆', '◇', '★', '☆', '※', '•', '-', '–',
'—', '→', '▶', '►', '▷', '▹', '◁', '◀', '◃', '◂', '·', '∙',
'*', '◦', '◼', '◾', ];
let first_char = trimmed.chars().next().unwrap();
BULLET_MARKERS.contains(&first_char)
}
fn looks_like_caption(&self, text: &str) -> bool {
let trimmed = text.trim_start();
if trimmed.is_empty() {
return false;
}
const CAPTION_PREFIXES: &[&str] = &[
"[그림", "[표", "[Figure", "[Table", "[Fig.", "[그림]", "[표]",
];
CAPTION_PREFIXES
.iter()
.any(|prefix| trimmed.starts_with(prefix))
}
fn cap_heading_level(&self, level: u8) -> u8 {
if level > self.config.max_heading_level {
self.config.max_heading_level
} else {
level
}
}
fn apply_sequence_analysis(
&self,
paragraphs: &[&Paragraph],
decisions: &mut [HeadingDecision],
) {
if paragraphs.len() < self.config.min_sequence_count {
return;
}
let mut i = 0;
while i < paragraphs.len() {
if let Some(seq_len) = self.detect_sequence_at(paragraphs, i) {
if seq_len >= self.config.min_sequence_count {
for decision in decisions.iter_mut().skip(i).take(seq_len) {
if decision.is_heading() {
*decision = HeadingDecision::Demoted;
}
}
i += seq_len;
continue;
}
}
i += 1;
}
}
fn detect_sequence_at(&self, paragraphs: &[&Paragraph], start: usize) -> Option<usize> {
let first_text = paragraphs[start].plain_text();
let first_trimmed = first_text.trim();
let first_marker = extract_sequence_marker(first_trimmed)?;
let mut seq_len = 1;
let mut expected_next = next_marker(&first_marker)?;
for para in paragraphs.iter().skip(start + 1) {
let text = para.plain_text();
let trimmed = text.trim();
if let Some(marker) = extract_sequence_marker(trimmed) {
if marker == expected_next {
seq_len += 1;
if let Some(next) = next_marker(&marker) {
expected_next = next;
} else {
break;
}
} else {
break;
}
} else {
break;
}
}
if seq_len >= 2 {
Some(seq_len)
} else {
None
}
}
pub fn config(&self) -> &HeadingConfig {
&self.config
}
}
fn extract_sequence_marker(text: &str) -> Option<String> {
let text = text.trim_start();
if text.is_empty() {
return None;
}
let chars: Vec<char> = text.chars().take(10).collect();
if chars.first() == Some(&'(') {
if let Some(close_idx) = chars.iter().position(|&c| c == ')') {
let inner: String = chars[1..close_idx].iter().collect();
if !inner.is_empty()
&& (inner.chars().all(|c| c.is_ascii_digit())
|| (inner.chars().count() == 1
&& inner.chars().next().is_some_and(|c| c.is_ascii_lowercase()))
|| (inner.chars().count() == 1
&& inner.chars().next().is_some_and(is_korean_sequence_char)))
{
return Some(inner);
}
}
}
let mut num_end = 0;
for (i, &c) in chars.iter().enumerate() {
if c.is_ascii_digit() {
num_end = i + 1;
} else {
break;
}
}
if num_end > 0 && num_end < chars.len() {
let next = chars[num_end];
if next == '.' || next == ')' {
return Some(chars[..num_end].iter().collect());
}
}
if chars.len() >= 2 && is_korean_sequence_char(chars[0]) && (chars[1] == '.' || chars[1] == ')')
{
return Some(chars[0].to_string());
}
if chars.len() >= 2 && chars[0].is_ascii_lowercase() && (chars[1] == '.' || chars[1] == ')') {
return Some(chars[0].to_string());
}
None
}
fn next_marker(marker: &str) -> Option<String> {
if let Ok(n) = marker.parse::<u32>() {
return Some((n + 1).to_string());
}
if marker.chars().count() == 1 {
let c = marker.chars().next()?;
const KOREAN_SEQ: &[char] = &[
'가', '나', '다', '라', '마', '바', '사', '아', '자', '차', '카', '타', '파', '하',
];
if let Some(idx) = KOREAN_SEQ.iter().position(|&x| x == c) {
if idx + 1 < KOREAN_SEQ.len() {
return Some(KOREAN_SEQ[idx + 1].to_string());
}
}
if c.is_ascii_lowercase() && c != 'z' {
return Some(((c as u8) + 1) as char).map(|c| c.to_string());
}
}
None
}
fn is_korean_sequence_char(c: char) -> bool {
const KOREAN_SEQ: &[char] = &[
'가', '나', '다', '라', '마', '바', '사', '아', '자', '차', '카', '타', '파', '하',
];
KOREAN_SEQ.contains(&c)
}
const ROMAN_NUMERALS: &[char] = &['Ⅰ', 'Ⅱ', 'Ⅲ', 'Ⅳ', 'Ⅴ', 'Ⅵ', 'Ⅶ', 'Ⅷ', 'Ⅸ', 'Ⅹ'];
pub fn is_korean_chapter_pattern(text: &str) -> Option<KoreanChapterInfo> {
let text = text.trim();
if text.is_empty() {
return None;
}
let chars: Vec<char> = text.chars().collect();
if chars.first() == Some(&'제') && chars.len() >= 3 {
let mut num_end = 1;
while num_end < chars.len() && chars[num_end].is_ascii_digit() {
num_end += 1;
}
if num_end > 1 && num_end < chars.len() {
let suffix = chars[num_end];
let chapter_type = match suffix {
'장' => Some(KoreanChapterType::Jang), '절' => Some(KoreanChapterType::Jeol), '조' => Some(KoreanChapterType::Jo), '항' => Some(KoreanChapterType::Hang), '편' => Some(KoreanChapterType::Pyeon), '부' => Some(KoreanChapterType::Bu), _ => None,
};
if let Some(ct) = chapter_type {
let number: String = chars[1..num_end].iter().collect();
if let Ok(n) = number.parse::<u32>() {
return Some(KoreanChapterInfo {
chapter_type: ct,
number: n,
title: if num_end + 1 < chars.len() {
Some(
chars[num_end + 1..]
.iter()
.collect::<String>()
.trim()
.to_string(),
)
} else {
None
},
});
}
}
}
}
if let Some(first) = chars.first() {
if let Some(roman_idx) = ROMAN_NUMERALS.iter().position(|&c| c == *first) {
let has_separator = chars
.get(1)
.map(|c| *c == '.' || *c == '-' || c.is_whitespace())
.unwrap_or(false);
if has_separator || chars.len() == 1 {
return Some(KoreanChapterInfo {
chapter_type: KoreanChapterType::Roman,
number: (roman_idx + 1) as u32,
title: if chars.len() > 2 {
Some(chars[2..].iter().collect::<String>().trim().to_string())
} else {
None
},
});
}
}
}
None
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum KoreanChapterType {
Jang,
Jeol,
Jo,
Hang,
Pyeon,
Bu,
Roman,
}
impl KoreanChapterType {
pub fn suggested_heading_level(&self) -> u8 {
match self {
KoreanChapterType::Pyeon | KoreanChapterType::Bu => 1,
KoreanChapterType::Jang | KoreanChapterType::Roman => 2,
KoreanChapterType::Jeol => 3,
KoreanChapterType::Jo | KoreanChapterType::Hang => 4,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct KoreanChapterInfo {
pub chapter_type: KoreanChapterType,
pub number: u32,
pub title: Option<String>,
}
pub fn next_korean_chapter(info: &KoreanChapterInfo) -> Option<KoreanChapterInfo> {
Some(KoreanChapterInfo {
chapter_type: info.chapter_type,
number: info.number + 1,
title: None, })
}
pub fn looks_like_korean_heading(text: &str) -> bool {
let trimmed = text.trim();
let char_count = trimmed.chars().count();
if is_korean_chapter_pattern(trimmed).is_some() {
return char_count <= 60;
}
false
}
#[cfg(test)]
mod tests {
use super::*;
use crate::model::{
Block, Document, InlineContent, ParagraphStyle, Section, TextRun, TextStyle,
};
fn make_paragraph(text: &str, heading_level: u8) -> Paragraph {
let mut para = Paragraph::with_style(ParagraphStyle {
heading_level,
..Default::default()
});
para.content.push(InlineContent::Text(TextRun::new(text)));
para
}
fn make_styled_paragraph(text: &str, heading_level: u8, style: TextStyle) -> Paragraph {
let mut para = Paragraph::with_style(ParagraphStyle {
heading_level,
..Default::default()
});
para.content
.push(InlineContent::Text(TextRun::with_style(text, style)));
para
}
#[test]
fn test_explicit_heading_trusted() {
let config = HeadingConfig::default();
let analyzer = HeadingAnalyzer::new(config);
let para = make_paragraph("제목", 1);
let paras = vec![¶];
let decisions = analyzer.analyze_paragraphs(¶s);
assert!(matches!(decisions[0], HeadingDecision::Explicit(1)));
}
#[test]
fn test_bullet_marker_demoted_when_untrusted() {
let config = HeadingConfig::default().with_trust_explicit(false);
let analyzer = HeadingAnalyzer::new(config);
let para = make_paragraph("ㅇ 항목 내용", 2);
let paras = vec![¶];
let decisions = analyzer.analyze_paragraphs(¶s);
assert_eq!(decisions[0], HeadingDecision::Demoted);
}
#[test]
fn test_sequence_marker_extraction() {
assert_eq!(extract_sequence_marker("1. 항목"), Some("1".to_string()));
assert_eq!(extract_sequence_marker("2) 항목"), Some("2".to_string()));
assert_eq!(extract_sequence_marker("(3) 항목"), Some("3".to_string()));
assert_eq!(extract_sequence_marker("가. 항목"), Some("가".to_string()));
assert_eq!(extract_sequence_marker("a. 항목"), Some("a".to_string()));
assert_eq!(extract_sequence_marker("일반 텍스트"), None);
}
#[test]
fn test_next_marker() {
assert_eq!(next_marker("1"), Some("2".to_string()));
assert_eq!(next_marker("9"), Some("10".to_string()));
assert_eq!(next_marker("가"), Some("나".to_string()));
assert_eq!(next_marker("a"), Some("b".to_string()));
assert_eq!(next_marker("하"), None); }
#[test]
fn test_sequence_analysis_demotes_consecutive() {
let config = HeadingConfig::default().with_trust_explicit(false);
let analyzer = HeadingAnalyzer::new(config);
let para1 = make_paragraph("1. 첫째", 2);
let para2 = make_paragraph("2. 둘째", 2);
let para3 = make_paragraph("3. 셋째", 2);
let paras = vec![¶1, ¶2, ¶3];
let decisions = analyzer.analyze_paragraphs(¶s);
assert!(decisions
.iter()
.all(|d| matches!(d, HeadingDecision::Demoted)));
}
#[test]
fn test_standalone_numbered_heading_preserved() {
let config = HeadingConfig::default().with_trust_explicit(false);
let analyzer = HeadingAnalyzer::new(config);
let para1 = make_paragraph("1. 서론", 2);
let para2 = make_paragraph("본문 내용입니다.", 0);
let para3 = make_paragraph("2. 본론", 2);
let paras = vec![¶1, ¶2, ¶3];
let decisions = analyzer.analyze_paragraphs(¶s);
assert!(
matches!(decisions[0], HeadingDecision::Explicit(2)),
"First heading should be preserved: {:?}",
decisions[0]
);
assert!(
matches!(decisions[2], HeadingDecision::Explicit(2)),
"Third heading should be preserved: {:?}",
decisions[2]
);
}
#[test]
fn test_long_text_demoted() {
let config = HeadingConfig::default().with_trust_explicit(false);
let analyzer = HeadingAnalyzer::new(config);
let long_text = "이것은 매우 긴 텍스트입니다. ".repeat(5);
let para = make_paragraph(&long_text, 2);
let paras = vec![¶];
let decisions = analyzer.analyze_paragraphs(¶s);
assert_eq!(decisions[0], HeadingDecision::Demoted);
}
#[test]
fn test_korean_sequence_patterns() {
let config = HeadingConfig::default().with_trust_explicit(false);
let analyzer = HeadingAnalyzer::new(config);
let para1 = make_paragraph("가. 첫째", 2);
let para2 = make_paragraph("나. 둘째", 2);
let para3 = make_paragraph("다. 셋째", 2);
let paras = vec![¶1, ¶2, ¶3];
let decisions = analyzer.analyze_paragraphs(¶s);
assert!(decisions
.iter()
.all(|d| matches!(d, HeadingDecision::Demoted)));
}
#[test]
fn test_max_heading_level_capped() {
let config = HeadingConfig::default().with_max_level(2);
let analyzer = HeadingAnalyzer::new(config);
let para = make_paragraph("제목", 4);
let paras = vec![¶];
let decisions = analyzer.analyze_paragraphs(¶s);
assert!(matches!(decisions[0], HeadingDecision::Explicit(2)));
}
#[test]
fn test_heading_decision_helpers() {
assert!(HeadingDecision::Explicit(1).is_heading());
assert!(HeadingDecision::Inferred(2).is_heading());
assert!(!HeadingDecision::Demoted.is_heading());
assert!(!HeadingDecision::None.is_heading());
assert_eq!(HeadingDecision::Explicit(3).level(), Some(3));
assert_eq!(HeadingDecision::Demoted.level(), None);
}
#[test]
fn test_korean_chapter_pattern_jang() {
let info = is_korean_chapter_pattern("제1장 서론").unwrap();
assert_eq!(info.chapter_type, KoreanChapterType::Jang);
assert_eq!(info.number, 1);
assert_eq!(info.title, Some("서론".to_string()));
let info2 = is_korean_chapter_pattern("제2장").unwrap();
assert_eq!(info2.chapter_type, KoreanChapterType::Jang);
assert_eq!(info2.number, 2);
assert_eq!(info2.title, None);
}
#[test]
fn test_korean_chapter_pattern_jeol() {
let info = is_korean_chapter_pattern("제1절 개요").unwrap();
assert_eq!(info.chapter_type, KoreanChapterType::Jeol);
assert_eq!(info.number, 1);
assert_eq!(info.title, Some("개요".to_string()));
}
#[test]
fn test_korean_chapter_pattern_jo() {
let info = is_korean_chapter_pattern("제15조 적용범위").unwrap();
assert_eq!(info.chapter_type, KoreanChapterType::Jo);
assert_eq!(info.number, 15);
}
#[test]
fn test_korean_chapter_pattern_roman() {
let info = is_korean_chapter_pattern("Ⅰ. 서론").unwrap();
assert_eq!(info.chapter_type, KoreanChapterType::Roman);
assert_eq!(info.number, 1);
assert_eq!(info.title, Some("서론".to_string()));
let info2 = is_korean_chapter_pattern("Ⅲ- 결론").unwrap();
assert_eq!(info2.chapter_type, KoreanChapterType::Roman);
assert_eq!(info2.number, 3);
}
#[test]
fn test_korean_chapter_pattern_none() {
assert!(is_korean_chapter_pattern("일반 텍스트").is_none());
assert!(is_korean_chapter_pattern("1. 항목").is_none());
assert!(is_korean_chapter_pattern("가. 내용").is_none());
}
#[test]
fn test_korean_chapter_suggested_level() {
assert_eq!(KoreanChapterType::Jang.suggested_heading_level(), 2);
assert_eq!(KoreanChapterType::Jeol.suggested_heading_level(), 3);
assert_eq!(KoreanChapterType::Jo.suggested_heading_level(), 4);
assert_eq!(KoreanChapterType::Roman.suggested_heading_level(), 2);
assert_eq!(KoreanChapterType::Pyeon.suggested_heading_level(), 1);
}
#[test]
fn test_looks_like_korean_heading() {
assert!(looks_like_korean_heading("제1장 서론"));
assert!(looks_like_korean_heading("Ⅱ. 본론"));
assert!(!looks_like_korean_heading("1. 첫 번째 항목"));
assert!(!looks_like_korean_heading("가. 내용"));
}
#[test]
fn test_next_korean_chapter() {
let info = KoreanChapterInfo {
chapter_type: KoreanChapterType::Jang,
number: 1,
title: Some("서론".to_string()),
};
let next = next_korean_chapter(&info).unwrap();
assert_eq!(next.chapter_type, KoreanChapterType::Jang);
assert_eq!(next.number, 2);
assert_eq!(next.title, None);
}
#[test]
fn test_statistical_inference_bold_large_font() {
let config = HeadingConfig::default()
.with_statistical_inference(true)
.with_trust_explicit(false);
let mut analyzer = HeadingAnalyzer::new(config);
let body1 = make_styled_paragraph(
"This is body text that should establish the baseline.",
0,
TextStyle {
font_size: Some(12.0),
..Default::default()
},
);
let body2 = make_styled_paragraph(
"More body text to strengthen the baseline determination.",
0,
TextStyle {
font_size: Some(12.0),
..Default::default()
},
);
let heading = make_styled_paragraph(
"Chapter Title",
0, TextStyle {
bold: true,
font_size: Some(16.0),
..Default::default()
},
);
let doc = Document {
sections: vec![Section {
content: vec![
Block::Paragraph(body1),
Block::Paragraph(heading),
Block::Paragraph(body2),
],
..Default::default()
}],
..Default::default()
};
let decisions = analyzer.analyze(&doc);
assert!(
matches!(decisions[1], HeadingDecision::Inferred(_)),
"Bold + large font should be inferred as heading: {:?}",
decisions[1]
);
}
#[test]
fn test_statistical_inference_large_font_without_bold() {
let config = HeadingConfig::default()
.with_statistical_inference(true)
.with_trust_explicit(false);
let mut analyzer = HeadingAnalyzer::new(config);
let body = make_styled_paragraph(
"Body text establishes baseline.",
0,
TextStyle {
font_size: Some(12.0),
..Default::default()
},
);
let large_not_bold = make_styled_paragraph(
"Large Title",
0,
TextStyle {
bold: false,
font_size: Some(16.0), ..Default::default()
},
);
let doc = Document {
sections: vec![Section {
content: vec![Block::Paragraph(body), Block::Paragraph(large_not_bold)],
..Default::default()
}],
..Default::default()
};
let decisions = analyzer.analyze(&doc);
assert!(
matches!(decisions[1], HeadingDecision::Inferred(_)),
"Large font should be heading even without bold: {:?}",
decisions[1]
);
}
#[test]
fn test_statistical_inference_enabled_by_default() {
let config = HeadingConfig::default();
let mut analyzer = HeadingAnalyzer::new(config);
let body = make_styled_paragraph(
"This is body text at normal size.",
0,
TextStyle {
font_size: Some(12.0),
..Default::default()
},
);
let heading = make_styled_paragraph(
"Bold Large Title",
0,
TextStyle {
bold: true,
font_size: Some(20.0), ..Default::default()
},
);
let doc = Document {
sections: vec![Section {
content: vec![Block::Paragraph(body), Block::Paragraph(heading)],
..Default::default()
}],
..Default::default()
};
let decisions = analyzer.analyze(&doc);
assert!(
matches!(decisions[1], HeadingDecision::Inferred(_)),
"Statistical inference should be enabled by default: {:?}",
decisions[1]
);
}
#[test]
fn test_document_stats_base_font_calculation() {
let mut stats = DocumentStats::default();
stats.font_sizes.insert(120, 500); stats.font_sizes.insert(160, 50); stats.font_sizes.insert(100, 100);
stats.calculate_base_font_size();
assert_eq!(stats.base_font_size, Some(12.0));
}
#[test]
fn test_is_larger_than_base() {
let stats = DocumentStats {
base_font_size: Some(12.0),
..Default::default()
};
assert!(stats.is_larger_than_base(14.5, 1.2)); assert!(stats.is_larger_than_base(16.0, 1.2)); assert!(!stats.is_larger_than_base(12.0, 1.2)); assert!(!stats.is_larger_than_base(14.0, 1.2)); }
fn make_center_styled_paragraph(text: &str, heading_level: u8, style: TextStyle) -> Paragraph {
use crate::model::Alignment;
let mut para = Paragraph::with_style(ParagraphStyle {
heading_level,
alignment: Alignment::Center,
..Default::default()
});
para.content
.push(InlineContent::Text(TextRun::with_style(text, style)));
para
}
#[test]
fn test_center_aligned_moderate_font_not_inferred() {
let config = HeadingConfig::default()
.with_statistical_inference(true)
.with_trust_explicit(false);
let mut analyzer = HeadingAnalyzer::new(config);
let body = make_styled_paragraph(
"본문 텍스트입니다. 기준 폰트 크기를 설정하기 위한 텍스트.",
0,
TextStyle {
font_size: Some(12.0),
..Default::default()
},
);
let signature = make_center_styled_paragraph(
"스마트제조혁신추진단장",
0,
TextStyle {
bold: true,
font_size: Some(16.0), ..Default::default()
},
);
let doc = Document {
sections: vec![Section {
content: vec![Block::Paragraph(body), Block::Paragraph(signature)],
..Default::default()
}],
..Default::default()
};
let decisions = analyzer.analyze(&doc);
assert!(
!decisions[1].is_heading(),
"Center-aligned moderate-font text should NOT be heading: {:?}",
decisions[1]
);
}
#[test]
fn test_center_aligned_very_large_font_inferred() {
let config = HeadingConfig::default()
.with_statistical_inference(true)
.with_trust_explicit(false);
let mut analyzer = HeadingAnalyzer::new(config);
let body = make_styled_paragraph(
"본문 텍스트입니다. 기준 폰트 크기를 설정하기 위한 텍스트.",
0,
TextStyle {
font_size: Some(12.0),
..Default::default()
},
);
let title = make_center_styled_paragraph(
"문서 제목",
0,
TextStyle {
bold: true,
font_size: Some(20.0), ..Default::default()
},
);
let doc = Document {
sections: vec![Section {
content: vec![Block::Paragraph(body), Block::Paragraph(title)],
..Default::default()
}],
..Default::default()
};
let decisions = analyzer.analyze(&doc);
assert!(
decisions[1].is_heading(),
"Center-aligned very-large-font text SHOULD be heading: {:?}",
decisions[1]
);
}
#[test]
fn test_left_aligned_moderate_font_still_heading() {
let config = HeadingConfig::default()
.with_statistical_inference(true)
.with_trust_explicit(false);
let mut analyzer = HeadingAnalyzer::new(config);
let body = make_styled_paragraph(
"본문 텍스트입니다. 기준 폰트 크기를 설정하기 위한 텍스트.",
0,
TextStyle {
font_size: Some(12.0),
..Default::default()
},
);
let heading = make_styled_paragraph(
"섹션 제목",
0,
TextStyle {
bold: true,
font_size: Some(16.0), ..Default::default()
},
);
let doc = Document {
sections: vec![Section {
content: vec![Block::Paragraph(body), Block::Paragraph(heading)],
..Default::default()
}],
..Default::default()
};
let decisions = analyzer.analyze(&doc);
assert!(
decisions[1].is_heading(),
"Left-aligned moderate-font text should still be heading: {:?}",
decisions[1]
);
}
}