use rpdfium_core::{Name, PdfSource};
use rpdfium_parser::{Object, ObjectStore};
use crate::error::{DocError, DocResult};
use crate::number_tree::NumberTree;
#[derive(Debug, Clone)]
pub struct PageLabel {
pub style: Option<PageLabelStyle>,
pub prefix: Option<String>,
pub start: i64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PageLabelStyle {
Decimal,
UpperRoman,
LowerRoman,
UpperAlpha,
LowerAlpha,
}
pub fn parse_page_labels<S: PdfSource>(
catalog: &Object,
store: &ObjectStore<S>,
) -> DocResult<Vec<(i64, PageLabel)>> {
let catalog_dict = store
.deep_resolve(catalog)
.map_err(|e| DocError::Parser(e.to_string()))?
.as_dict()
.ok_or(DocError::UnexpectedType)?;
let labels_obj = match catalog_dict.get(&Name::page_labels()) {
Some(obj) => store
.deep_resolve(obj)
.map_err(|e| DocError::Parser(e.to_string()))?,
None => return Ok(Vec::new()),
};
let tree = NumberTree::parse(labels_obj, store, convert_page_label)?;
Ok(tree.entries().to_vec())
}
fn convert_page_label<S: PdfSource>(obj: &Object, store: &ObjectStore<S>) -> DocResult<PageLabel> {
let dict = obj.as_dict().ok_or(DocError::UnexpectedType)?;
let style = dict
.get(&Name::s())
.and_then(|o| {
store
.deep_resolve(o)
.ok()
.and_then(|r| r.as_name().map(|n| n.as_str().into_owned()))
})
.and_then(|s| match s.as_str() {
"D" => Some(PageLabelStyle::Decimal),
"R" => Some(PageLabelStyle::UpperRoman),
"r" => Some(PageLabelStyle::LowerRoman),
"A" => Some(PageLabelStyle::UpperAlpha),
"a" => Some(PageLabelStyle::LowerAlpha),
_ => None,
});
let prefix = dict.get(&Name::p()).and_then(|o| {
store
.deep_resolve(o)
.ok()
.and_then(|r| r.as_string().map(|s| s.to_string_lossy()))
});
let start = dict
.get(&Name::st())
.and_then(|o| o.as_i64())
.filter(|&n| n >= 1)
.unwrap_or(1);
Ok(PageLabel {
style,
prefix,
start,
})
}
pub fn format_label(label: &PageLabel, page_offset: i64) -> String {
let num = label.start + page_offset;
let mut result = String::new();
if let Some(ref prefix) = label.prefix {
result.push_str(prefix);
}
if let Some(style) = label.style {
let num_str = match style {
PageLabelStyle::Decimal => format!("{num}"),
PageLabelStyle::UpperRoman => to_roman(num, true),
PageLabelStyle::LowerRoman => to_roman(num, false),
PageLabelStyle::UpperAlpha => to_alpha(num, true),
PageLabelStyle::LowerAlpha => to_alpha(num, false),
};
result.push_str(&num_str);
}
result
}
fn to_roman(mut num: i64, upper: bool) -> String {
if num <= 0 {
return String::new();
}
let table: &[(i64, &str)] = &[
(1000, "M"),
(900, "CM"),
(500, "D"),
(400, "CD"),
(100, "C"),
(90, "XC"),
(50, "L"),
(40, "XL"),
(10, "X"),
(9, "IX"),
(5, "V"),
(4, "IV"),
(1, "I"),
];
let mut result = String::new();
for &(value, symbol) in table {
while num >= value {
result.push_str(symbol);
num -= value;
}
}
if upper { result } else { result.to_lowercase() }
}
fn to_alpha(num: i64, upper: bool) -> String {
if num <= 0 {
return String::new();
}
let mut n = num - 1; let mut result = Vec::new();
loop {
let remainder = (n % 26) as u8;
let base = if upper { b'A' } else { b'a' };
result.push(base + remainder);
n = n / 26 - 1;
if n < 0 {
break;
}
}
result.reverse();
String::from_utf8(result).unwrap_or_default()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_decimal_labels() {
let label = PageLabel {
style: Some(PageLabelStyle::Decimal),
prefix: None,
start: 1,
};
assert_eq!(format_label(&label, 0), "1");
assert_eq!(format_label(&label, 4), "5");
assert_eq!(format_label(&label, 99), "100");
}
#[test]
fn test_upper_roman() {
let label = PageLabel {
style: Some(PageLabelStyle::UpperRoman),
prefix: None,
start: 1,
};
assert_eq!(format_label(&label, 0), "I");
assert_eq!(format_label(&label, 3), "IV");
assert_eq!(format_label(&label, 8), "IX");
assert_eq!(format_label(&label, 13), "XIV");
}
#[test]
fn test_lower_roman() {
let label = PageLabel {
style: Some(PageLabelStyle::LowerRoman),
prefix: None,
start: 1,
};
assert_eq!(format_label(&label, 0), "i");
assert_eq!(format_label(&label, 3), "iv");
assert_eq!(format_label(&label, 8), "ix");
}
#[test]
fn test_upper_alpha() {
let label = PageLabel {
style: Some(PageLabelStyle::UpperAlpha),
prefix: None,
start: 1,
};
assert_eq!(format_label(&label, 0), "A");
assert_eq!(format_label(&label, 1), "B");
assert_eq!(format_label(&label, 25), "Z");
assert_eq!(format_label(&label, 26), "AA");
assert_eq!(format_label(&label, 27), "AB");
}
#[test]
fn test_lower_alpha() {
let label = PageLabel {
style: Some(PageLabelStyle::LowerAlpha),
prefix: None,
start: 1,
};
assert_eq!(format_label(&label, 0), "a");
assert_eq!(format_label(&label, 25), "z");
assert_eq!(format_label(&label, 26), "aa");
}
#[test]
fn test_prefix_application() {
let label = PageLabel {
style: Some(PageLabelStyle::Decimal),
prefix: Some("Appendix-".to_string()),
start: 1,
};
assert_eq!(format_label(&label, 0), "Appendix-1");
assert_eq!(format_label(&label, 2), "Appendix-3");
}
#[test]
fn test_start_value_offset() {
let label = PageLabel {
style: Some(PageLabelStyle::Decimal),
prefix: None,
start: 10,
};
assert_eq!(format_label(&label, 0), "10");
assert_eq!(format_label(&label, 5), "15");
}
#[test]
fn test_no_style_prefix_only() {
let label = PageLabel {
style: None,
prefix: Some("Cover".to_string()),
start: 1,
};
assert_eq!(format_label(&label, 0), "Cover");
}
#[test]
fn test_roman_edge_cases() {
assert_eq!(to_roman(1, true), "I");
assert_eq!(to_roman(4, true), "IV");
assert_eq!(to_roman(9, true), "IX");
assert_eq!(to_roman(14, true), "XIV");
assert_eq!(to_roman(40, true), "XL");
assert_eq!(to_roman(90, true), "XC");
assert_eq!(to_roman(400, true), "CD");
assert_eq!(to_roman(900, true), "CM");
assert_eq!(to_roman(1999, true), "MCMXCIX");
}
#[test]
fn test_alpha_edge_cases() {
assert_eq!(to_alpha(1, true), "A");
assert_eq!(to_alpha(26, true), "Z");
assert_eq!(to_alpha(27, true), "AA");
assert_eq!(to_alpha(28, true), "AB");
assert_eq!(to_alpha(52, true), "AZ");
assert_eq!(to_alpha(53, true), "BA");
}
#[test]
fn test_multi_range_format_sequence() {
let roman_label = PageLabel {
style: Some(PageLabelStyle::LowerRoman),
prefix: None,
start: 1,
};
assert_eq!(format_label(&roman_label, 0), "i");
assert_eq!(format_label(&roman_label, 1), "ii");
assert_eq!(format_label(&roman_label, 2), "iii");
assert_eq!(format_label(&roman_label, 3), "iv");
let decimal_label = PageLabel {
style: Some(PageLabelStyle::Decimal),
prefix: None,
start: 1,
};
assert_eq!(format_label(&decimal_label, 0), "1");
assert_eq!(format_label(&decimal_label, 1), "2");
assert_eq!(format_label(&decimal_label, 5), "6");
}
#[test]
fn test_format_label_no_style_no_prefix() {
let label = PageLabel {
style: None,
prefix: None,
start: 1,
};
assert_eq!(format_label(&label, 0), "");
assert_eq!(format_label(&label, 99), "");
}
#[test]
fn test_roman_zero_returns_empty() {
assert_eq!(to_roman(0, true), "");
assert_eq!(to_roman(0, false), "");
}
#[test]
fn test_alpha_zero_returns_empty() {
assert_eq!(to_alpha(0, true), "");
assert_eq!(to_alpha(0, false), "");
}
#[test]
fn test_page_label_get_label_perf() {
let ranges: Vec<(i64, PageLabel)> = vec![
(
0,
PageLabel {
style: Some(PageLabelStyle::UpperRoman),
prefix: None,
start: 1,
},
),
(
100,
PageLabel {
style: Some(PageLabelStyle::UpperAlpha),
prefix: Some("abc".to_string()),
start: 5,
},
),
(
900,
PageLabel {
style: Some(PageLabelStyle::Decimal),
prefix: None,
start: 999,
},
),
(
3000,
PageLabel {
style: Some(PageLabelStyle::LowerRoman),
prefix: None,
start: 1,
},
),
(
5000,
PageLabel {
style: Some(PageLabelStyle::LowerAlpha),
prefix: None,
start: 1,
},
),
(
8000,
PageLabel {
style: None,
prefix: Some("x".to_string()),
start: 1,
},
),
];
for page_index in 0..10001i64 {
let range_idx = ranges.partition_point(|(start, _)| *start <= page_index) - 1;
let (range_start, label) = &ranges[range_idx];
let offset = page_index - range_start;
let result = format_label(label, offset);
if label.style.is_some() || label.prefix.is_some() {
assert!(!result.is_empty(), "page {page_index} produced empty label");
}
}
let label_at = |page: i64| -> String {
let range_idx = ranges.partition_point(|(start, _)| *start <= page) - 1;
let (range_start, label) = &ranges[range_idx];
format_label(label, page - range_start)
};
assert_eq!(label_at(0), "I");
assert_eq!(label_at(1), "II");
assert_eq!(label_at(37), "XXXVIII");
assert_eq!(label_at(99), "C");
assert_eq!(label_at(100), "abcE");
assert_eq!(label_at(900), "999");
assert_eq!(label_at(901), "1000");
assert_eq!(label_at(3000), "i");
assert_eq!(label_at(5000), "a");
assert_eq!(label_at(8000), "x");
assert_eq!(label_at(10000), "x");
}
}