Skip to main content

text_document/
fragment.rs

1//! DocumentFragment — format-agnostic rich text interchange type.
2
3use crate::{InlineContent, ListStyle};
4use frontend::common::parser_tools::content_parser::{ParsedElement, ParsedSpan};
5use frontend::common::parser_tools::fragment_schema::{
6    FragmentBlock, FragmentData, FragmentElement, FragmentTable, FragmentTableCell,
7};
8
9/// A piece of rich text that can be inserted into a [`TextDocument`](crate::TextDocument).
10///
11/// `DocumentFragment` is the clipboard/interchange type. It carries
12/// blocks, per-character format runs, image anchors, and structural
13/// metadata in a format-agnostic internal representation.
14#[derive(Debug, Clone)]
15pub struct DocumentFragment {
16    data: String,
17    plain_text: String,
18}
19
20impl DocumentFragment {
21    /// Create an empty fragment.
22    pub fn new() -> Self {
23        Self {
24            data: String::new(),
25            plain_text: String::new(),
26        }
27    }
28
29    /// Create a fragment from plain text.
30    ///
31    /// Builds valid fragment data so the fragment can be inserted via
32    /// [`TextCursor::insert_fragment`](crate::TextCursor::insert_fragment).
33    pub fn from_plain_text(text: &str) -> Self {
34        let blocks: Vec<FragmentBlock> = text
35            .split('\n')
36            .map(|line| FragmentBlock {
37                plain_text: line.to_string(),
38                elements: vec![FragmentElement {
39                    content: InlineContent::Text(line.to_string()),
40                    fmt_font_family: None,
41                    fmt_font_point_size: None,
42                    fmt_font_weight: None,
43                    fmt_font_bold: None,
44                    fmt_font_italic: None,
45                    fmt_font_underline: None,
46                    fmt_font_overline: None,
47                    fmt_font_strikeout: None,
48                    fmt_letter_spacing: None,
49                    fmt_word_spacing: None,
50                    fmt_anchor_href: None,
51                    fmt_anchor_names: vec![],
52                    fmt_is_anchor: None,
53                    fmt_tooltip: None,
54                    fmt_underline_style: None,
55                    fmt_vertical_alignment: None,
56                }],
57                heading_level: None,
58                list: None,
59                alignment: None,
60                indent: None,
61                text_indent: None,
62                marker: None,
63                top_margin: None,
64                bottom_margin: None,
65                left_margin: None,
66                right_margin: None,
67                tab_positions: vec![],
68                line_height: None,
69                non_breakable_lines: None,
70                direction: None,
71                background_color: None,
72                is_code_block: None,
73                code_language: None,
74                hyphenate: None,
75                language: None,
76            })
77            .collect();
78
79        let data = serde_json::to_string(&FragmentData {
80            blocks,
81            tables: vec![],
82        })
83        .expect("fragment serialization should not fail");
84
85        Self {
86            data,
87            plain_text: text.to_string(),
88        }
89    }
90
91    /// Create a fragment from HTML.
92    pub fn from_html(html: &str) -> Self {
93        let parsed = frontend::common::parser_tools::content_parser::parse_html_elements(html);
94        parsed_elements_to_fragment(parsed)
95    }
96
97    /// Create a fragment from Markdown.
98    pub fn from_markdown(markdown: &str) -> Self {
99        let parsed = frontend::common::parser_tools::content_parser::parse_markdown(markdown);
100        parsed_elements_to_fragment(parsed)
101    }
102
103    /// Create a fragment from an entire document.
104    pub fn from_document(doc: &crate::TextDocument) -> crate::Result<Self> {
105        let inner = doc.inner.lock();
106        // Use i64::MAX as anchor to ensure the full document is captured.
107        // Document positions include inter-block gaps, so character_count
108        // alone would truncate the last block.
109        let dto = frontend::document_inspection::ExtractFragmentDto {
110            position: 0,
111            anchor: i64::MAX,
112        };
113        let result =
114            frontend::commands::document_inspection_commands::extract_fragment(&inner.ctx, &dto)?;
115        Ok(Self::from_raw(result.fragment_data, result.plain_text))
116    }
117
118    /// Create a fragment from the serialized internal format.
119    pub(crate) fn from_raw(data: String, plain_text: String) -> Self {
120        Self { data, plain_text }
121    }
122
123    /// Export the fragment as plain text.
124    pub fn to_plain_text(&self) -> &str {
125        &self.plain_text
126    }
127
128    /// Export the fragment as HTML.
129    pub fn to_html(&self) -> String {
130        if self.data.is_empty() {
131            return String::from("<html><head><meta charset=\"utf-8\"></head><body></body></html>");
132        }
133
134        let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
135            Ok(d) => d,
136            Err(_) => {
137                return String::from(
138                    "<html><head><meta charset=\"utf-8\"></head><body></body></html>",
139                );
140            }
141        };
142
143        let mut body = String::new();
144        let blocks = &fragment_data.blocks;
145
146        // Single inline-only block with no tables: emit inline HTML without block wrapper
147        if blocks.len() == 1 && blocks[0].is_inline_only() && fragment_data.tables.is_empty() {
148            push_inline_html(&mut body, &blocks[0].elements);
149            return format!(
150                "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
151                body
152            );
153        }
154
155        // Sort tables by block_insert_index so we can interleave them
156        let mut sorted_tables: Vec<&FragmentTable> = fragment_data.tables.iter().collect();
157        sorted_tables.sort_by_key(|t| t.block_insert_index);
158        let mut table_cursor = 0;
159
160        let mut i = 0;
161
162        while i < blocks.len() {
163            // Insert any tables whose block_insert_index == i
164            while table_cursor < sorted_tables.len()
165                && sorted_tables[table_cursor].block_insert_index <= i
166            {
167                push_table_html(&mut body, sorted_tables[table_cursor]);
168                table_cursor += 1;
169            }
170
171            let block = &blocks[i];
172
173            if let Some(ref list) = block.list {
174                let is_ordered = is_ordered_list_style(&list.style);
175                let list_tag = if is_ordered { "ol" } else { "ul" };
176                body.push('<');
177                body.push_str(list_tag);
178                body.push('>');
179
180                while i < blocks.len() {
181                    let b = &blocks[i];
182                    match &b.list {
183                        Some(l) if is_ordered_list_style(&l.style) == is_ordered => {
184                            body.push_str("<li>");
185                            push_inline_html(&mut body, &b.elements);
186                            body.push_str("</li>");
187                            i += 1;
188                        }
189                        _ => break,
190                    }
191                }
192
193                body.push_str("</");
194                body.push_str(list_tag);
195                body.push('>');
196            } else if let Some(level) = block.heading_level {
197                let n = level.clamp(1, 6);
198                body.push_str(&format!("<h{}>", n));
199                push_inline_html(&mut body, &block.elements);
200                body.push_str(&format!("</h{}>", n));
201                i += 1;
202            } else {
203                // Emit block-level formatting as inline styles (ISSUE-19)
204                let style = block_style_attr(block);
205                if style.is_empty() {
206                    body.push_str("<p>");
207                } else {
208                    body.push_str(&format!("<p style=\"{}\">", style));
209                }
210                push_inline_html(&mut body, &block.elements);
211                body.push_str("</p>");
212                i += 1;
213            }
214        }
215
216        // Emit any remaining tables after all blocks
217        while table_cursor < sorted_tables.len() {
218            push_table_html(&mut body, sorted_tables[table_cursor]);
219            table_cursor += 1;
220        }
221
222        format!(
223            "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
224            body
225        )
226    }
227
228    /// Export the fragment as Markdown.
229    pub fn to_markdown(&self) -> String {
230        if self.data.is_empty() {
231            return String::new();
232        }
233
234        let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
235            Ok(d) => d,
236            Err(_) => return String::new(),
237        };
238
239        // (rendered_text, is_list_item) — used for join logic
240        let mut parts: Vec<(String, bool)> = Vec::new();
241        let mut prev_was_list = false;
242        let mut list_counter: u32 = 0;
243
244        // Sort tables by block_insert_index for interleaving
245        let mut sorted_tables: Vec<&FragmentTable> = fragment_data.tables.iter().collect();
246        sorted_tables.sort_by_key(|t| t.block_insert_index);
247        let mut table_cursor = 0;
248
249        for (blk_idx, block) in fragment_data.blocks.iter().enumerate() {
250            // Insert tables before this block index
251            while table_cursor < sorted_tables.len()
252                && sorted_tables[table_cursor].block_insert_index <= blk_idx
253            {
254                parts.push((render_table_markdown(sorted_tables[table_cursor]), false));
255                prev_was_list = false;
256                list_counter = 0;
257                table_cursor += 1;
258            }
259
260            let inline_text = render_inline_markdown(&block.elements);
261            let is_list = block.list.is_some();
262
263            let indent_prefix = match block.indent {
264                Some(n) if n > 0 => "  ".repeat(n as usize),
265                _ => String::new(),
266            };
267
268            if let Some(level) = block.heading_level {
269                let n = level.clamp(1, 6) as usize;
270                let prefix = "#".repeat(n);
271                parts.push((format!("{} {}", prefix, inline_text), false));
272                prev_was_list = false;
273                list_counter = 0;
274            } else if let Some(ref list) = block.list {
275                let is_ordered = is_ordered_list_style(&list.style);
276                if !prev_was_list {
277                    list_counter = 0;
278                }
279                if is_ordered {
280                    list_counter += 1;
281                    parts.push((
282                        format!("{}{}. {}", indent_prefix, list_counter, inline_text),
283                        true,
284                    ));
285                } else {
286                    parts.push((format!("{}- {}", indent_prefix, inline_text), true));
287                }
288                prev_was_list = true;
289            } else {
290                if indent_prefix.is_empty() {
291                    parts.push((inline_text, false));
292                } else {
293                    parts.push((format!("{}{}", indent_prefix, inline_text), false));
294                }
295                prev_was_list = false;
296                list_counter = 0;
297            }
298
299            if !is_list {
300                prev_was_list = false;
301            }
302        }
303
304        // Emit remaining tables after all blocks
305        while table_cursor < sorted_tables.len() {
306            parts.push((render_table_markdown(sorted_tables[table_cursor]), false));
307            table_cursor += 1;
308        }
309
310        // Join: list items with \n, others with \n\n
311        let mut result = String::new();
312        for (idx, (text, is_list)) in parts.iter().enumerate() {
313            if idx > 0 {
314                let (_, prev_is_list) = &parts[idx - 1];
315                if *prev_is_list && *is_list {
316                    result.push('\n');
317                } else {
318                    result.push_str("\n\n");
319                }
320            }
321            result.push_str(text);
322        }
323
324        result
325    }
326
327    /// Returns true if the fragment contains no text or elements.
328    pub fn is_empty(&self) -> bool {
329        self.plain_text.is_empty()
330    }
331
332    /// Returns the serialized internal representation.
333    pub(crate) fn raw_data(&self) -> &str {
334        &self.data
335    }
336}
337
338impl Default for DocumentFragment {
339    fn default() -> Self {
340        Self::new()
341    }
342}
343
344// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
345// Shared helpers (used by both to_html and to_markdown)
346// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
347
348fn is_ordered_list_style(style: &ListStyle) -> bool {
349    matches!(
350        style,
351        ListStyle::Decimal
352            | ListStyle::LowerAlpha
353            | ListStyle::UpperAlpha
354            | ListStyle::LowerRoman
355            | ListStyle::UpperRoman
356    )
357}
358
359// ── HTML helpers ────────────────────────────────────────────────
360
361fn escape_html(s: &str) -> String {
362    let mut out = String::with_capacity(s.len());
363    for c in s.chars() {
364        match c {
365            '&' => out.push_str("&amp;"),
366            '<' => out.push_str("&lt;"),
367            '>' => out.push_str("&gt;"),
368            '"' => out.push_str("&quot;"),
369            '\'' => out.push_str("&#x27;"),
370            // A raw CR in text content is normalised to LF by the HTML5 input
371            // preprocessor on re-import (CR-from-`&#xD;` survives, literal CR
372            // does not), which breaks serialiser idempotency. Emit it as a
373            // numeric reference so it round-trips losslessly.
374            '\r' => out.push_str("&#13;"),
375            _ => out.push(c),
376        }
377    }
378    out
379}
380
381/// Build a CSS `style` attribute value from block-level formatting (ISSUE-19).
382fn block_style_attr(block: &FragmentBlock) -> String {
383    use crate::Alignment;
384
385    let mut parts = Vec::new();
386    if let Some(ref alignment) = block.alignment {
387        let value = match alignment {
388            Alignment::Left => "left",
389            Alignment::Right => "right",
390            Alignment::Center => "center",
391            Alignment::Justify => "justify",
392        };
393        parts.push(format!("text-align: {}", value));
394    }
395    if let Some(n) = block.indent
396        && n > 0
397    {
398        parts.push(format!("margin-left: {}em", n));
399    }
400    if let Some(px) = block.text_indent
401        && px != 0
402    {
403        parts.push(format!("text-indent: {}px", px));
404    }
405    if let Some(px) = block.top_margin {
406        parts.push(format!("margin-top: {}px", px));
407    }
408    if let Some(px) = block.bottom_margin {
409        parts.push(format!("margin-bottom: {}px", px));
410    }
411    if let Some(px) = block.left_margin {
412        parts.push(format!("margin-left: {}px", px));
413    }
414    if let Some(px) = block.right_margin {
415        parts.push(format!("margin-right: {}px", px));
416    }
417    parts.join("; ")
418}
419
420fn push_inline_html(out: &mut String, elements: &[FragmentElement]) {
421    for elem in elements {
422        let text = match &elem.content {
423            InlineContent::Text(t) => escape_html(t),
424            InlineContent::Image {
425                name,
426                width,
427                height,
428                ..
429            } => {
430                format!(
431                    "<img src=\"{}\" width=\"{}\" height=\"{}\">",
432                    escape_html(name),
433                    width,
434                    height
435                )
436            }
437            InlineContent::Empty => String::new(),
438        };
439
440        let is_monospace = elem
441            .fmt_font_family
442            .as_deref()
443            .is_some_and(|f| f == "monospace");
444        let is_bold = elem.fmt_font_bold.unwrap_or(false);
445        let is_italic = elem.fmt_font_italic.unwrap_or(false);
446        let is_underline = elem.fmt_font_underline.unwrap_or(false);
447        let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
448        let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
449
450        let mut result = text;
451
452        if is_monospace {
453            result = format!("<code>{}</code>", result);
454        }
455        if is_bold {
456            result = format!("<strong>{}</strong>", result);
457        }
458        if is_italic {
459            result = format!("<em>{}</em>", result);
460        }
461        if is_underline {
462            result = format!("<u>{}</u>", result);
463        }
464        if is_strikeout {
465            result = format!("<s>{}</s>", result);
466        }
467        if is_anchor && let Some(ref href) = elem.fmt_anchor_href {
468            result = format!("<a href=\"{}\">{}</a>", escape_html(href), result);
469        }
470
471        out.push_str(&result);
472    }
473}
474
475/// Emit an HTML `<table>` for a `FragmentTable`.
476fn push_table_html(out: &mut String, table: &FragmentTable) {
477    out.push_str("<table>");
478    for row in 0..table.rows {
479        out.push_str("<tr>");
480        for col in 0..table.columns {
481            if let Some(cell) = table.cells.iter().find(|c| c.row == row && c.column == col) {
482                out.push_str("<td");
483                if cell.row_span > 1 {
484                    out.push_str(&format!(" rowspan=\"{}\"", cell.row_span));
485                }
486                if cell.column_span > 1 {
487                    out.push_str(&format!(" colspan=\"{}\"", cell.column_span));
488                }
489                out.push('>');
490                for (i, block) in cell.blocks.iter().enumerate() {
491                    if i > 0 {
492                        out.push_str("<br>");
493                    }
494                    push_inline_html(out, &block.elements);
495                }
496                out.push_str("</td>");
497            }
498            // Skip positions covered by spans — the HTML renderer handles them.
499        }
500        out.push_str("</tr>");
501    }
502    out.push_str("</table>");
503}
504
505// ── Markdown helpers ────────────────────────────────────────────
506
507fn escape_markdown(s: &str) -> String {
508    let mut out = String::with_capacity(s.len());
509    for c in s.chars() {
510        if matches!(
511            c,
512            '\\' | '`'
513                | '*'
514                | '_'
515                | '{'
516                | '}'
517                | '['
518                | ']'
519                | '('
520                | ')'
521                | '#'
522                | '+'
523                | '-'
524                | '.'
525                | '!'
526                | '|'
527                | '~'
528                | '<'
529                | '>'
530        ) {
531            out.push('\\');
532        }
533        out.push(c);
534    }
535    out
536}
537
538fn render_inline_markdown(elements: &[FragmentElement]) -> String {
539    let mut out = String::new();
540    for elem in elements {
541        let raw_text = match &elem.content {
542            InlineContent::Text(t) => t.clone(),
543            InlineContent::Image { name, .. } => format!("![{}]({})", name, name),
544            InlineContent::Empty => String::new(),
545        };
546
547        let is_monospace = elem
548            .fmt_font_family
549            .as_deref()
550            .is_some_and(|f| f == "monospace");
551        let is_bold = elem.fmt_font_bold.unwrap_or(false);
552        let is_italic = elem.fmt_font_italic.unwrap_or(false);
553        let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
554        let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
555
556        if is_monospace {
557            out.push('`');
558            out.push_str(&raw_text);
559            out.push('`');
560        } else {
561            let mut text = escape_markdown(&raw_text);
562            if is_bold && is_italic {
563                text = format!("***{}***", text);
564            } else if is_bold {
565                text = format!("**{}**", text);
566            } else if is_italic {
567                text = format!("*{}*", text);
568            }
569            if is_strikeout {
570                text = format!("~~{}~~", text);
571            }
572            if is_anchor {
573                let href = elem.fmt_anchor_href.as_deref().unwrap_or("");
574                out.push_str(&format!("[{}]({})", text, href));
575            } else {
576                out.push_str(&text);
577            }
578        }
579    }
580    out
581}
582
583/// Render a `FragmentTable` as a pipe-delimited Markdown table.
584fn render_table_markdown(table: &FragmentTable) -> String {
585    let mut rows: Vec<Vec<String>> = vec![vec![String::new(); table.columns]; table.rows];
586
587    for cell in &table.cells {
588        let text: String = cell
589            .blocks
590            .iter()
591            .map(|b| render_inline_markdown(&b.elements))
592            .collect::<Vec<_>>()
593            .join(" ");
594        if cell.row < table.rows && cell.column < table.columns {
595            rows[cell.row][cell.column] = text;
596        }
597    }
598
599    let mut out = String::new();
600    for (i, row) in rows.iter().enumerate() {
601        out.push_str("| ");
602        out.push_str(&row.join(" | "));
603        out.push_str(" |");
604        if i == 0 {
605            // Header separator
606            out.push('\n');
607            out.push('|');
608            for _ in 0..table.columns {
609                out.push_str(" --- |");
610            }
611        }
612        if i + 1 < rows.len() {
613            out.push('\n');
614        }
615    }
616    out
617}
618
619// ── Fragment construction from parsed content ───────────────────
620
621/// Convert parsed blocks (from HTML or Markdown parser) into a `DocumentFragment`.
622/// Convert a `ParsedSpan` to a `FragmentElement`.
623fn span_to_fragment_element(span: &ParsedSpan) -> FragmentElement {
624    let content = InlineContent::Text(span.text.clone());
625    let fmt_font_family = if span.code {
626        Some("monospace".into())
627    } else {
628        None
629    };
630    let fmt_font_bold = if span.bold { Some(true) } else { None };
631    let fmt_font_italic = if span.italic { Some(true) } else { None };
632    let fmt_font_underline = if span.underline { Some(true) } else { None };
633    let fmt_font_strikeout = if span.strikeout { Some(true) } else { None };
634    let (fmt_anchor_href, fmt_is_anchor) = if let Some(ref href) = span.link_href {
635        (Some(href.clone()), Some(true))
636    } else {
637        (None, None)
638    };
639
640    FragmentElement {
641        content,
642        fmt_font_family,
643        fmt_font_point_size: None,
644        fmt_font_weight: None,
645        fmt_font_bold,
646        fmt_font_italic,
647        fmt_font_underline,
648        fmt_font_overline: None,
649        fmt_font_strikeout,
650        fmt_letter_spacing: None,
651        fmt_word_spacing: None,
652        fmt_anchor_href,
653        fmt_anchor_names: vec![],
654        fmt_is_anchor,
655        fmt_tooltip: None,
656        fmt_underline_style: None,
657        fmt_vertical_alignment: None,
658    }
659}
660
661/// Convert parsed elements (blocks + tables) into a `DocumentFragment`,
662/// preserving table structure as `FragmentTable` entries.
663fn parsed_elements_to_fragment(parsed: Vec<ParsedElement>) -> DocumentFragment {
664    use frontend::common::parser_tools::fragment_schema::FragmentList;
665
666    let mut blocks: Vec<FragmentBlock> = Vec::new();
667    let mut tables: Vec<FragmentTable> = Vec::new();
668
669    for elem in parsed {
670        match elem {
671            ParsedElement::Block(pb) => {
672                let elements: Vec<FragmentElement> =
673                    pb.spans.iter().map(span_to_fragment_element).collect();
674                let plain_text: String = pb.spans.iter().map(|s| s.text.as_str()).collect();
675                let list = pb.list_style.map(|style| FragmentList {
676                    style,
677                    indent: pb.list_indent as i64,
678                    prefix: String::new(),
679                    suffix: String::new(),
680                });
681
682                blocks.push(FragmentBlock {
683                    plain_text,
684                    elements,
685                    heading_level: pb.heading_level,
686                    list,
687                    alignment: None,
688                    indent: None,
689                    text_indent: None,
690                    marker: None,
691                    top_margin: None,
692                    bottom_margin: None,
693                    left_margin: None,
694                    right_margin: None,
695                    tab_positions: vec![],
696                    line_height: pb.line_height,
697                    non_breakable_lines: pb.non_breakable_lines,
698                    direction: pb.direction,
699                    background_color: pb.background_color,
700                    is_code_block: None,
701                    code_language: None,
702                    hyphenate: None,
703                    language: None,
704                });
705            }
706            ParsedElement::Table(pt) => {
707                let block_insert_index = blocks.len();
708                let num_columns = pt.rows.iter().map(|r| r.len()).max().unwrap_or(0);
709                let num_rows = pt.rows.len();
710
711                let mut frag_cells: Vec<FragmentTableCell> = Vec::new();
712                for (row_idx, row) in pt.rows.iter().enumerate() {
713                    for (col_idx, cell) in row.iter().enumerate() {
714                        let cell_elements: Vec<FragmentElement> =
715                            cell.spans.iter().map(span_to_fragment_element).collect();
716                        let cell_text: String =
717                            cell.spans.iter().map(|s| s.text.as_str()).collect();
718
719                        frag_cells.push(FragmentTableCell {
720                            row: row_idx,
721                            column: col_idx,
722                            row_span: 1,
723                            column_span: 1,
724                            blocks: vec![FragmentBlock {
725                                plain_text: cell_text,
726                                elements: cell_elements,
727                                heading_level: None,
728                                list: None,
729                                alignment: None,
730                                indent: None,
731                                text_indent: None,
732                                marker: None,
733                                top_margin: None,
734                                bottom_margin: None,
735                                left_margin: None,
736                                right_margin: None,
737                                tab_positions: vec![],
738                                line_height: None,
739                                non_breakable_lines: None,
740                                direction: None,
741                                background_color: None,
742                                is_code_block: None,
743                                code_language: None,
744                                hyphenate: None,
745                                language: None,
746                            }],
747                            fmt_padding: None,
748                            fmt_border: None,
749                            fmt_vertical_alignment: None,
750                            fmt_background_color: None,
751                        });
752                    }
753                }
754
755                tables.push(FragmentTable {
756                    rows: num_rows,
757                    columns: num_columns,
758                    cells: frag_cells,
759                    block_insert_index,
760                    fmt_border: None,
761                    fmt_cell_spacing: None,
762                    fmt_cell_padding: None,
763                    fmt_width: None,
764                    fmt_alignment: None,
765                    column_widths: vec![],
766                });
767            }
768        }
769    }
770
771    let data = serde_json::to_string(&FragmentData { blocks, tables })
772        .expect("fragment serialization should not fail");
773
774    let plain_text = parsed_plain_text_from_data(&data);
775
776    DocumentFragment { data, plain_text }
777}
778
779/// Extract plain text from serialized fragment data.
780fn parsed_plain_text_from_data(data: &str) -> String {
781    let fragment_data: FragmentData = match serde_json::from_str(data) {
782        Ok(d) => d,
783        Err(_) => return String::new(),
784    };
785
786    fragment_data
787        .blocks
788        .iter()
789        .map(|b| b.plain_text.as_str())
790        .collect::<Vec<_>>()
791        .join("\n")
792}