Skip to main content

edgeparse_core/models/
list.rs

1//! List structures — PDFList, ListItem.
2
3use serde::{Deserialize, Serialize};
4
5use super::bbox::BoundingBox;
6use super::content::ContentElement;
7use super::enums::SemanticType;
8use super::table::TableTokenRow;
9
10/// An ordered or unordered list.
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct PDFList {
13    /// Bounding box
14    pub bbox: BoundingBox,
15    /// Global index
16    pub index: Option<u32>,
17    /// Nesting level
18    pub level: Option<String>,
19    /// List items
20    pub list_items: Vec<ListItem>,
21    /// Detected numbering style (e.g., "1.", "a)", "•")
22    pub numbering_style: Option<String>,
23    /// Common prefix across items
24    pub common_prefix: Option<String>,
25    /// Previous list ID for cross-page linking
26    pub previous_list_id: Option<u64>,
27    /// Next list ID for cross-page linking
28    pub next_list_id: Option<u64>,
29}
30
31/// An entry in a PDFList.
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct ListItem {
34    /// Bounding box
35    pub bbox: BoundingBox,
36    /// Global index
37    pub index: Option<u32>,
38    /// Nesting level
39    pub level: Option<String>,
40    /// Label (bullet/number)
41    pub label: ListLabel,
42    /// Body content
43    pub body: ListBody,
44    /// Character length of the label
45    pub label_length: usize,
46    /// Processed content elements
47    pub contents: Vec<ContentElement>,
48    /// Optional semantic type
49    pub semantic_type: Option<SemanticType>,
50}
51
52/// Label part of a list item (bullet, number, etc.).
53#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct ListLabel {
55    /// Bounding box
56    pub bbox: BoundingBox,
57    /// Content rows
58    pub content: Vec<TableTokenRow>,
59    /// Optional semantic type
60    pub semantic_type: Option<SemanticType>,
61}
62
63/// Body part of a list item.
64#[derive(Debug, Clone, Serialize, Deserialize)]
65pub struct ListBody {
66    /// Bounding box
67    pub bbox: BoundingBox,
68    /// Content rows
69    pub content: Vec<TableTokenRow>,
70    /// Optional semantic type
71    pub semantic_type: Option<SemanticType>,
72}
73
74/// Information about a sequence of list items during detection.
75#[derive(Debug, Clone)]
76pub struct ListInterval {
77    /// Indices of paragraphs that form list items
78    pub list_indexes: Vec<usize>,
79    /// Extracted info for each list item
80    pub list_item_infos: Vec<ListItemInfo>,
81    /// Detected numbering style
82    pub numbering_style: Option<String>,
83    /// Number of columns in multi-column lists
84    pub number_of_columns: Option<usize>,
85}
86
87/// Information about a single list item during detection.
88#[derive(Debug, Clone)]
89pub struct ListItemInfo {
90    /// Label text (e.g., "1.", "a)", "•")
91    pub label_text: String,
92    /// Numeric sequence value for ordering
93    pub sequence_value: i64,
94}