Skip to main content

text_document_common/parser_tools/
fragment_schema.rs

1use serde::{Deserialize, Serialize};
2
3use crate::entities::*;
4
5#[derive(Debug, Clone, Serialize, Deserialize)]
6pub struct FragmentData {
7    pub blocks: Vec<FragmentBlock>,
8    /// Table fragments extracted from cell selections. Empty for text-only fragments.
9    #[serde(default, skip_serializing_if = "Vec::is_empty")]
10    pub tables: Vec<FragmentTable>,
11}
12
13/// A table (or rectangular sub-region) captured from a cell selection.
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct FragmentTable {
16    pub rows: usize,
17    pub columns: usize,
18    pub cells: Vec<FragmentTableCell>,
19    /// Index into the parent `FragmentData::blocks` at which this table
20    /// should be inserted.  Blocks `[0..index)` come before the table,
21    /// blocks `[index..]` come after.  Default `0` for backward compat.
22    #[serde(default)]
23    pub block_insert_index: usize,
24    // ── Table-level formatting ────────────────────────────────────
25    #[serde(default, skip_serializing_if = "Option::is_none")]
26    pub fmt_border: Option<i64>,
27    #[serde(default, skip_serializing_if = "Option::is_none")]
28    pub fmt_cell_spacing: Option<i64>,
29    #[serde(default, skip_serializing_if = "Option::is_none")]
30    pub fmt_cell_padding: Option<i64>,
31    #[serde(default, skip_serializing_if = "Option::is_none")]
32    pub fmt_width: Option<i64>,
33    #[serde(default, skip_serializing_if = "Option::is_none")]
34    pub fmt_alignment: Option<Alignment>,
35    #[serde(default, skip_serializing_if = "Vec::is_empty")]
36    pub column_widths: Vec<i64>,
37}
38
39/// One cell within a [`FragmentTable`].
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct FragmentTableCell {
42    pub row: usize,
43    pub column: usize,
44    pub row_span: usize,
45    pub column_span: usize,
46    pub blocks: Vec<FragmentBlock>,
47    // ── Cell-level formatting ─────────────────────────────────────
48    #[serde(default, skip_serializing_if = "Option::is_none")]
49    pub fmt_padding: Option<i64>,
50    #[serde(default, skip_serializing_if = "Option::is_none")]
51    pub fmt_border: Option<i64>,
52    #[serde(default, skip_serializing_if = "Option::is_none")]
53    pub fmt_vertical_alignment: Option<CellVerticalAlignment>,
54    #[serde(default, skip_serializing_if = "Option::is_none")]
55    pub fmt_background_color: Option<String>,
56}
57
58#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct FragmentBlock {
60    pub plain_text: String,
61    pub elements: Vec<FragmentElement>,
62    pub heading_level: Option<i64>,
63    pub list: Option<FragmentList>,
64    pub alignment: Option<Alignment>,
65    pub indent: Option<i64>,
66    pub text_indent: Option<i64>,
67    pub marker: Option<MarkerType>,
68    pub top_margin: Option<i64>,
69    pub bottom_margin: Option<i64>,
70    pub left_margin: Option<i64>,
71    pub right_margin: Option<i64>,
72    pub tab_positions: Vec<i64>,
73    pub line_height: Option<i64>,
74    pub non_breakable_lines: Option<bool>,
75    pub direction: Option<TextDirection>,
76    pub background_color: Option<String>,
77    pub is_code_block: Option<bool>,
78    pub code_language: Option<String>,
79}
80
81#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct FragmentElement {
83    pub content: InlineContent,
84    pub fmt_font_family: Option<String>,
85    pub fmt_font_point_size: Option<i64>,
86    pub fmt_font_weight: Option<i64>,
87    pub fmt_font_bold: Option<bool>,
88    pub fmt_font_italic: Option<bool>,
89    pub fmt_font_underline: Option<bool>,
90    pub fmt_font_overline: Option<bool>,
91    pub fmt_font_strikeout: Option<bool>,
92    pub fmt_letter_spacing: Option<i64>,
93    pub fmt_word_spacing: Option<i64>,
94    pub fmt_anchor_href: Option<String>,
95    pub fmt_anchor_names: Vec<String>,
96    pub fmt_is_anchor: Option<bool>,
97    pub fmt_tooltip: Option<String>,
98    pub fmt_underline_style: Option<UnderlineStyle>,
99    pub fmt_vertical_alignment: Option<CharVerticalAlignment>,
100}
101
102#[derive(Debug, Clone, Serialize, Deserialize)]
103pub struct FragmentList {
104    pub style: ListStyle,
105    pub indent: i64,
106    pub prefix: String,
107    pub suffix: String,
108}
109
110impl FragmentElement {
111    pub fn from_entity(e: &InlineElement) -> Self {
112        FragmentElement {
113            content: e.content.clone(),
114            fmt_font_family: e.fmt_font_family.clone(),
115            fmt_font_point_size: e.fmt_font_point_size,
116            fmt_font_weight: e.fmt_font_weight,
117            fmt_font_bold: e.fmt_font_bold,
118            fmt_font_italic: e.fmt_font_italic,
119            fmt_font_underline: e.fmt_font_underline,
120            fmt_font_overline: e.fmt_font_overline,
121            fmt_font_strikeout: e.fmt_font_strikeout,
122            fmt_letter_spacing: e.fmt_letter_spacing,
123            fmt_word_spacing: e.fmt_word_spacing,
124            fmt_anchor_href: e.fmt_anchor_href.clone(),
125            fmt_anchor_names: e.fmt_anchor_names.clone(),
126            fmt_is_anchor: e.fmt_is_anchor,
127            fmt_tooltip: e.fmt_tooltip.clone(),
128            fmt_underline_style: e.fmt_underline_style.clone(),
129            fmt_vertical_alignment: e.fmt_vertical_alignment.clone(),
130        }
131    }
132
133    pub fn to_entity(&self) -> InlineElement {
134        InlineElement {
135            id: 0,
136            created_at: chrono::Utc::now(),
137            updated_at: chrono::Utc::now(),
138            content: self.content.clone(),
139            fmt_font_family: self.fmt_font_family.clone(),
140            fmt_font_point_size: self.fmt_font_point_size,
141            fmt_font_weight: self.fmt_font_weight,
142            fmt_font_bold: self.fmt_font_bold,
143            fmt_font_italic: self.fmt_font_italic,
144            fmt_font_underline: self.fmt_font_underline,
145            fmt_font_overline: self.fmt_font_overline,
146            fmt_font_strikeout: self.fmt_font_strikeout,
147            fmt_letter_spacing: self.fmt_letter_spacing,
148            fmt_word_spacing: self.fmt_word_spacing,
149            fmt_anchor_href: self.fmt_anchor_href.clone(),
150            fmt_anchor_names: self.fmt_anchor_names.clone(),
151            fmt_is_anchor: self.fmt_is_anchor,
152            fmt_tooltip: self.fmt_tooltip.clone(),
153            fmt_underline_style: self.fmt_underline_style.clone(),
154            fmt_vertical_alignment: self.fmt_vertical_alignment.clone(),
155        }
156    }
157}
158
159impl FragmentBlock {
160    /// Returns `true` when this block carries no block-level formatting,
161    /// meaning its content is purely inline.
162    pub fn is_inline_only(&self) -> bool {
163        self.heading_level.is_none()
164            && self.list.is_none()
165            && self.alignment.is_none()
166            && self.indent.unwrap_or(0) == 0
167            && self.text_indent.unwrap_or(0) == 0
168            && self.marker.is_none()
169            && self.top_margin.is_none()
170            && self.bottom_margin.is_none()
171            && self.left_margin.is_none()
172            && self.right_margin.is_none()
173            && self.line_height.is_none()
174            && self.non_breakable_lines.is_none()
175            && self.direction.is_none()
176            && self.background_color.is_none()
177            && self.is_code_block.is_none()
178            && self.code_language.is_none()
179    }
180
181    pub fn from_entity(block: &Block, elements: &[InlineElement], list: Option<&List>) -> Self {
182        FragmentBlock {
183            plain_text: block.plain_text.clone(),
184            elements: elements.iter().map(FragmentElement::from_entity).collect(),
185            heading_level: block.fmt_heading_level,
186            list: list.map(FragmentList::from_entity),
187            alignment: block.fmt_alignment.clone(),
188            indent: block.fmt_indent,
189            text_indent: block.fmt_text_indent,
190            marker: block.fmt_marker.clone(),
191            top_margin: block.fmt_top_margin,
192            bottom_margin: block.fmt_bottom_margin,
193            left_margin: block.fmt_left_margin,
194            right_margin: block.fmt_right_margin,
195            tab_positions: block.fmt_tab_positions.clone(),
196            line_height: block.fmt_line_height,
197            non_breakable_lines: block.fmt_non_breakable_lines,
198            direction: block.fmt_direction.clone(),
199            background_color: block.fmt_background_color.clone(),
200            is_code_block: block.fmt_is_code_block,
201            code_language: block.fmt_code_language.clone(),
202        }
203    }
204}
205
206impl FragmentList {
207    pub fn from_entity(list: &List) -> Self {
208        FragmentList {
209            style: list.style.clone(),
210            indent: list.indent,
211            prefix: list.prefix.clone(),
212            suffix: list.suffix.clone(),
213        }
214    }
215
216    pub fn to_entity(&self) -> List {
217        List {
218            id: 0,
219            created_at: chrono::Utc::now(),
220            updated_at: chrono::Utc::now(),
221            style: self.style.clone(),
222            indent: self.indent,
223            prefix: self.prefix.clone(),
224            suffix: self.suffix.clone(),
225        }
226    }
227}