Skip to main content

text_document_common/parser_tools/
fragment_schema.rs

1use serde::{Deserialize, Serialize};
2
3use crate::entities::*;
4
5#[derive(Debug, Clone, Serialize, Deserialize)]
6pub struct FragmentData {
7    pub blocks: Vec<FragmentBlock>,
8    /// Table fragments extracted from cell selections. Empty for text-only fragments.
9    #[serde(default, skip_serializing_if = "Vec::is_empty")]
10    pub tables: Vec<FragmentTable>,
11}
12
13/// A table (or rectangular sub-region) captured from a cell selection.
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct FragmentTable {
16    pub rows: usize,
17    pub columns: usize,
18    pub cells: Vec<FragmentTableCell>,
19}
20
21/// One cell within a [`FragmentTable`].
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct FragmentTableCell {
24    pub row: usize,
25    pub column: usize,
26    pub row_span: usize,
27    pub column_span: usize,
28    pub blocks: Vec<FragmentBlock>,
29}
30
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct FragmentBlock {
33    pub plain_text: String,
34    pub elements: Vec<FragmentElement>,
35    pub heading_level: Option<i64>,
36    pub list: Option<FragmentList>,
37    pub alignment: Option<Alignment>,
38    pub indent: Option<i64>,
39    pub text_indent: Option<i64>,
40    pub marker: Option<MarkerType>,
41    pub top_margin: Option<i64>,
42    pub bottom_margin: Option<i64>,
43    pub left_margin: Option<i64>,
44    pub right_margin: Option<i64>,
45    pub tab_positions: Vec<i64>,
46    pub line_height: Option<i64>,
47    pub non_breakable_lines: Option<bool>,
48    pub direction: Option<TextDirection>,
49    pub background_color: Option<String>,
50    pub is_code_block: Option<bool>,
51    pub code_language: Option<String>,
52}
53
54#[derive(Debug, Clone, Serialize, Deserialize)]
55pub struct FragmentElement {
56    pub content: InlineContent,
57    pub fmt_font_family: Option<String>,
58    pub fmt_font_point_size: Option<i64>,
59    pub fmt_font_weight: Option<i64>,
60    pub fmt_font_bold: Option<bool>,
61    pub fmt_font_italic: Option<bool>,
62    pub fmt_font_underline: Option<bool>,
63    pub fmt_font_overline: Option<bool>,
64    pub fmt_font_strikeout: Option<bool>,
65    pub fmt_letter_spacing: Option<i64>,
66    pub fmt_word_spacing: Option<i64>,
67    pub fmt_anchor_href: Option<String>,
68    pub fmt_anchor_names: Vec<String>,
69    pub fmt_is_anchor: Option<bool>,
70    pub fmt_tooltip: Option<String>,
71    pub fmt_underline_style: Option<UnderlineStyle>,
72    pub fmt_vertical_alignment: Option<CharVerticalAlignment>,
73}
74
75#[derive(Debug, Clone, Serialize, Deserialize)]
76pub struct FragmentList {
77    pub style: ListStyle,
78    pub indent: i64,
79    pub prefix: String,
80    pub suffix: String,
81}
82
83impl FragmentElement {
84    pub fn from_entity(e: &InlineElement) -> Self {
85        FragmentElement {
86            content: e.content.clone(),
87            fmt_font_family: e.fmt_font_family.clone(),
88            fmt_font_point_size: e.fmt_font_point_size,
89            fmt_font_weight: e.fmt_font_weight,
90            fmt_font_bold: e.fmt_font_bold,
91            fmt_font_italic: e.fmt_font_italic,
92            fmt_font_underline: e.fmt_font_underline,
93            fmt_font_overline: e.fmt_font_overline,
94            fmt_font_strikeout: e.fmt_font_strikeout,
95            fmt_letter_spacing: e.fmt_letter_spacing,
96            fmt_word_spacing: e.fmt_word_spacing,
97            fmt_anchor_href: e.fmt_anchor_href.clone(),
98            fmt_anchor_names: e.fmt_anchor_names.clone(),
99            fmt_is_anchor: e.fmt_is_anchor,
100            fmt_tooltip: e.fmt_tooltip.clone(),
101            fmt_underline_style: e.fmt_underline_style.clone(),
102            fmt_vertical_alignment: e.fmt_vertical_alignment.clone(),
103        }
104    }
105
106    pub fn to_entity(&self) -> InlineElement {
107        InlineElement {
108            id: 0,
109            created_at: chrono::Utc::now(),
110            updated_at: chrono::Utc::now(),
111            content: self.content.clone(),
112            fmt_font_family: self.fmt_font_family.clone(),
113            fmt_font_point_size: self.fmt_font_point_size,
114            fmt_font_weight: self.fmt_font_weight,
115            fmt_font_bold: self.fmt_font_bold,
116            fmt_font_italic: self.fmt_font_italic,
117            fmt_font_underline: self.fmt_font_underline,
118            fmt_font_overline: self.fmt_font_overline,
119            fmt_font_strikeout: self.fmt_font_strikeout,
120            fmt_letter_spacing: self.fmt_letter_spacing,
121            fmt_word_spacing: self.fmt_word_spacing,
122            fmt_anchor_href: self.fmt_anchor_href.clone(),
123            fmt_anchor_names: self.fmt_anchor_names.clone(),
124            fmt_is_anchor: self.fmt_is_anchor,
125            fmt_tooltip: self.fmt_tooltip.clone(),
126            fmt_underline_style: self.fmt_underline_style.clone(),
127            fmt_vertical_alignment: self.fmt_vertical_alignment.clone(),
128        }
129    }
130}
131
132impl FragmentBlock {
133    /// Returns `true` when this block carries no block-level formatting,
134    /// meaning its content is purely inline.
135    pub fn is_inline_only(&self) -> bool {
136        self.heading_level.is_none()
137            && self.list.is_none()
138            && self.alignment.is_none()
139            && self.indent.unwrap_or(0) == 0
140            && self.text_indent.unwrap_or(0) == 0
141            && self.marker.is_none()
142            && self.top_margin.is_none()
143            && self.bottom_margin.is_none()
144            && self.left_margin.is_none()
145            && self.right_margin.is_none()
146            && self.line_height.is_none()
147            && self.non_breakable_lines.is_none()
148            && self.direction.is_none()
149            && self.background_color.is_none()
150            && self.is_code_block.is_none()
151            && self.code_language.is_none()
152    }
153
154    pub fn from_entity(block: &Block, elements: &[InlineElement], list: Option<&List>) -> Self {
155        FragmentBlock {
156            plain_text: block.plain_text.clone(),
157            elements: elements.iter().map(FragmentElement::from_entity).collect(),
158            heading_level: block.fmt_heading_level,
159            list: list.map(FragmentList::from_entity),
160            alignment: block.fmt_alignment.clone(),
161            indent: block.fmt_indent,
162            text_indent: block.fmt_text_indent,
163            marker: block.fmt_marker.clone(),
164            top_margin: block.fmt_top_margin,
165            bottom_margin: block.fmt_bottom_margin,
166            left_margin: block.fmt_left_margin,
167            right_margin: block.fmt_right_margin,
168            tab_positions: block.fmt_tab_positions.clone(),
169            line_height: block.fmt_line_height,
170            non_breakable_lines: block.fmt_non_breakable_lines,
171            direction: block.fmt_direction.clone(),
172            background_color: block.fmt_background_color.clone(),
173            is_code_block: block.fmt_is_code_block,
174            code_language: block.fmt_code_language.clone(),
175        }
176    }
177}
178
179impl FragmentList {
180    pub fn from_entity(list: &List) -> Self {
181        FragmentList {
182            style: list.style.clone(),
183            indent: list.indent,
184            prefix: list.prefix.clone(),
185            suffix: list.suffix.clone(),
186        }
187    }
188
189    pub fn to_entity(&self) -> List {
190        List {
191            id: 0,
192            created_at: chrono::Utc::now(),
193            updated_at: chrono::Utc::now(),
194            style: self.style.clone(),
195            indent: self.indent,
196            prefix: self.prefix.clone(),
197            suffix: self.suffix.clone(),
198        }
199    }
200}