1use rpdfium_core::{Name, PdfSource};
7use rpdfium_parser::{Object, ObjectStore};
8
9use crate::error::{DocError, DocResult};
10use crate::number_tree::NumberTree;
11
12#[derive(Debug, Clone)]
14pub struct PageLabel {
15 pub style: Option<PageLabelStyle>,
17 pub prefix: Option<String>,
19 pub start: i64,
21}
22
23#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub enum PageLabelStyle {
26 Decimal,
28 UpperRoman,
30 LowerRoman,
32 UpperAlpha,
34 LowerAlpha,
36}
37
38pub fn parse_page_labels<S: PdfSource>(
42 catalog: &Object,
43 store: &ObjectStore<S>,
44) -> DocResult<Vec<(i64, PageLabel)>> {
45 let catalog_dict = store
46 .deep_resolve(catalog)
47 .map_err(|e| DocError::Parser(e.to_string()))?
48 .as_dict()
49 .ok_or(DocError::UnexpectedType)?;
50
51 let labels_obj = match catalog_dict.get(&Name::page_labels()) {
52 Some(obj) => store
53 .deep_resolve(obj)
54 .map_err(|e| DocError::Parser(e.to_string()))?,
55 None => return Ok(Vec::new()),
56 };
57
58 let tree = NumberTree::parse(labels_obj, store, convert_page_label)?;
59 Ok(tree.entries().to_vec())
60}
61
62fn convert_page_label<S: PdfSource>(obj: &Object, store: &ObjectStore<S>) -> DocResult<PageLabel> {
64 let dict = obj.as_dict().ok_or(DocError::UnexpectedType)?;
65
66 let style = dict
68 .get(&Name::s())
69 .and_then(|o| {
70 store
71 .deep_resolve(o)
72 .ok()
73 .and_then(|r| r.as_name().map(|n| n.as_str().into_owned()))
74 })
75 .and_then(|s| match s.as_str() {
76 "D" => Some(PageLabelStyle::Decimal),
77 "R" => Some(PageLabelStyle::UpperRoman),
78 "r" => Some(PageLabelStyle::LowerRoman),
79 "A" => Some(PageLabelStyle::UpperAlpha),
80 "a" => Some(PageLabelStyle::LowerAlpha),
81 _ => None,
82 });
83
84 let prefix = dict.get(&Name::p()).and_then(|o| {
86 store
87 .deep_resolve(o)
88 .ok()
89 .and_then(|r| r.as_string().map(|s| s.to_string_lossy()))
90 });
91
92 let start = dict
94 .get(&Name::st())
95 .and_then(|o| o.as_i64())
96 .filter(|&n| n >= 1)
97 .unwrap_or(1);
98
99 Ok(PageLabel {
100 style,
101 prefix,
102 start,
103 })
104}
105
106pub fn format_label(label: &PageLabel, page_offset: i64) -> String {
111 let num = label.start + page_offset;
112 let mut result = String::new();
113
114 if let Some(ref prefix) = label.prefix {
115 result.push_str(prefix);
116 }
117
118 if let Some(style) = label.style {
119 let num_str = match style {
120 PageLabelStyle::Decimal => format!("{num}"),
121 PageLabelStyle::UpperRoman => to_roman(num, true),
122 PageLabelStyle::LowerRoman => to_roman(num, false),
123 PageLabelStyle::UpperAlpha => to_alpha(num, true),
124 PageLabelStyle::LowerAlpha => to_alpha(num, false),
125 };
126 result.push_str(&num_str);
127 }
128
129 result
130}
131
132fn to_roman(mut num: i64, upper: bool) -> String {
134 if num <= 0 {
135 return String::new();
136 }
137
138 let table: &[(i64, &str)] = &[
139 (1000, "M"),
140 (900, "CM"),
141 (500, "D"),
142 (400, "CD"),
143 (100, "C"),
144 (90, "XC"),
145 (50, "L"),
146 (40, "XL"),
147 (10, "X"),
148 (9, "IX"),
149 (5, "V"),
150 (4, "IV"),
151 (1, "I"),
152 ];
153
154 let mut result = String::new();
155 for &(value, symbol) in table {
156 while num >= value {
157 result.push_str(symbol);
158 num -= value;
159 }
160 }
161
162 if upper { result } else { result.to_lowercase() }
163}
164
165fn to_alpha(num: i64, upper: bool) -> String {
168 if num <= 0 {
169 return String::new();
170 }
171
172 let mut n = num - 1; let mut result = Vec::new();
174
175 loop {
176 let remainder = (n % 26) as u8;
177 let base = if upper { b'A' } else { b'a' };
178 result.push(base + remainder);
179 n = n / 26 - 1;
180 if n < 0 {
181 break;
182 }
183 }
184
185 result.reverse();
186 String::from_utf8(result).unwrap_or_default()
187}
188
189#[cfg(test)]
190mod tests {
191 use super::*;
192
193 #[test]
194 fn test_decimal_labels() {
195 let label = PageLabel {
196 style: Some(PageLabelStyle::Decimal),
197 prefix: None,
198 start: 1,
199 };
200 assert_eq!(format_label(&label, 0), "1");
201 assert_eq!(format_label(&label, 4), "5");
202 assert_eq!(format_label(&label, 99), "100");
203 }
204
205 #[test]
206 fn test_upper_roman() {
207 let label = PageLabel {
208 style: Some(PageLabelStyle::UpperRoman),
209 prefix: None,
210 start: 1,
211 };
212 assert_eq!(format_label(&label, 0), "I");
213 assert_eq!(format_label(&label, 3), "IV");
214 assert_eq!(format_label(&label, 8), "IX");
215 assert_eq!(format_label(&label, 13), "XIV");
216 }
217
218 #[test]
219 fn test_lower_roman() {
220 let label = PageLabel {
221 style: Some(PageLabelStyle::LowerRoman),
222 prefix: None,
223 start: 1,
224 };
225 assert_eq!(format_label(&label, 0), "i");
226 assert_eq!(format_label(&label, 3), "iv");
227 assert_eq!(format_label(&label, 8), "ix");
228 }
229
230 #[test]
231 fn test_upper_alpha() {
232 let label = PageLabel {
233 style: Some(PageLabelStyle::UpperAlpha),
234 prefix: None,
235 start: 1,
236 };
237 assert_eq!(format_label(&label, 0), "A");
238 assert_eq!(format_label(&label, 1), "B");
239 assert_eq!(format_label(&label, 25), "Z");
240 assert_eq!(format_label(&label, 26), "AA");
241 assert_eq!(format_label(&label, 27), "AB");
242 }
243
244 #[test]
245 fn test_lower_alpha() {
246 let label = PageLabel {
247 style: Some(PageLabelStyle::LowerAlpha),
248 prefix: None,
249 start: 1,
250 };
251 assert_eq!(format_label(&label, 0), "a");
252 assert_eq!(format_label(&label, 25), "z");
253 assert_eq!(format_label(&label, 26), "aa");
254 }
255
256 #[test]
257 fn test_prefix_application() {
258 let label = PageLabel {
259 style: Some(PageLabelStyle::Decimal),
260 prefix: Some("Appendix-".to_string()),
261 start: 1,
262 };
263 assert_eq!(format_label(&label, 0), "Appendix-1");
264 assert_eq!(format_label(&label, 2), "Appendix-3");
265 }
266
267 #[test]
268 fn test_start_value_offset() {
269 let label = PageLabel {
270 style: Some(PageLabelStyle::Decimal),
271 prefix: None,
272 start: 10,
273 };
274 assert_eq!(format_label(&label, 0), "10");
275 assert_eq!(format_label(&label, 5), "15");
276 }
277
278 #[test]
279 fn test_no_style_prefix_only() {
280 let label = PageLabel {
281 style: None,
282 prefix: Some("Cover".to_string()),
283 start: 1,
284 };
285 assert_eq!(format_label(&label, 0), "Cover");
286 }
287
288 #[test]
289 fn test_roman_edge_cases() {
290 assert_eq!(to_roman(1, true), "I");
291 assert_eq!(to_roman(4, true), "IV");
292 assert_eq!(to_roman(9, true), "IX");
293 assert_eq!(to_roman(14, true), "XIV");
294 assert_eq!(to_roman(40, true), "XL");
295 assert_eq!(to_roman(90, true), "XC");
296 assert_eq!(to_roman(400, true), "CD");
297 assert_eq!(to_roman(900, true), "CM");
298 assert_eq!(to_roman(1999, true), "MCMXCIX");
299 }
300
301 #[test]
302 fn test_alpha_edge_cases() {
303 assert_eq!(to_alpha(1, true), "A");
304 assert_eq!(to_alpha(26, true), "Z");
305 assert_eq!(to_alpha(27, true), "AA");
306 assert_eq!(to_alpha(28, true), "AB");
307 assert_eq!(to_alpha(52, true), "AZ");
308 assert_eq!(to_alpha(53, true), "BA");
309 }
310
311 #[test]
312 fn test_multi_range_format_sequence() {
313 let roman_label = PageLabel {
315 style: Some(PageLabelStyle::LowerRoman),
316 prefix: None,
317 start: 1,
318 };
319 assert_eq!(format_label(&roman_label, 0), "i");
320 assert_eq!(format_label(&roman_label, 1), "ii");
321 assert_eq!(format_label(&roman_label, 2), "iii");
322 assert_eq!(format_label(&roman_label, 3), "iv");
323
324 let decimal_label = PageLabel {
325 style: Some(PageLabelStyle::Decimal),
326 prefix: None,
327 start: 1,
328 };
329 assert_eq!(format_label(&decimal_label, 0), "1");
330 assert_eq!(format_label(&decimal_label, 1), "2");
331 assert_eq!(format_label(&decimal_label, 5), "6");
332 }
333
334 #[test]
335 fn test_format_label_no_style_no_prefix() {
336 let label = PageLabel {
337 style: None,
338 prefix: None,
339 start: 1,
340 };
341 assert_eq!(format_label(&label, 0), "");
342 assert_eq!(format_label(&label, 99), "");
343 }
344
345 #[test]
346 fn test_roman_zero_returns_empty() {
347 assert_eq!(to_roman(0, true), "");
348 assert_eq!(to_roman(0, false), "");
349 }
350
351 #[test]
352 fn test_alpha_zero_returns_empty() {
353 assert_eq!(to_alpha(0, true), "");
354 assert_eq!(to_alpha(0, false), "");
355 }
356
357 #[test]
364 fn test_page_label_get_label_perf() {
365 let ranges: Vec<(i64, PageLabel)> = vec![
373 (
374 0,
375 PageLabel {
376 style: Some(PageLabelStyle::UpperRoman),
377 prefix: None,
378 start: 1,
379 },
380 ),
381 (
382 100,
383 PageLabel {
384 style: Some(PageLabelStyle::UpperAlpha),
385 prefix: Some("abc".to_string()),
386 start: 5,
387 },
388 ),
389 (
390 900,
391 PageLabel {
392 style: Some(PageLabelStyle::Decimal),
393 prefix: None,
394 start: 999,
395 },
396 ),
397 (
398 3000,
399 PageLabel {
400 style: Some(PageLabelStyle::LowerRoman),
401 prefix: None,
402 start: 1,
403 },
404 ),
405 (
406 5000,
407 PageLabel {
408 style: Some(PageLabelStyle::LowerAlpha),
409 prefix: None,
410 start: 1,
411 },
412 ),
413 (
414 8000,
415 PageLabel {
416 style: None,
417 prefix: Some("x".to_string()),
418 start: 1,
419 },
420 ),
421 ];
422
423 for page_index in 0..10001i64 {
425 let range_idx = ranges.partition_point(|(start, _)| *start <= page_index) - 1;
427 let (range_start, label) = &ranges[range_idx];
428 let offset = page_index - range_start;
429 let result = format_label(label, offset);
430 if label.style.is_some() || label.prefix.is_some() {
433 assert!(!result.is_empty(), "page {page_index} produced empty label");
434 }
435 }
436
437 let label_at = |page: i64| -> String {
439 let range_idx = ranges.partition_point(|(start, _)| *start <= page) - 1;
440 let (range_start, label) = &ranges[range_idx];
441 format_label(label, page - range_start)
442 };
443
444 assert_eq!(label_at(0), "I");
445 assert_eq!(label_at(1), "II");
446 assert_eq!(label_at(37), "XXXVIII");
447 assert_eq!(label_at(99), "C");
448 assert_eq!(label_at(100), "abcE");
449 assert_eq!(label_at(900), "999");
450 assert_eq!(label_at(901), "1000");
451 assert_eq!(label_at(3000), "i");
452 assert_eq!(label_at(5000), "a");
453 assert_eq!(label_at(8000), "x");
454 assert_eq!(label_at(10000), "x");
455 }
456}