Skip to main content

text_document/
fragment.rs

1//! DocumentFragment — format-agnostic rich text interchange type.
2
3use crate::{InlineContent, ListStyle};
4use frontend::common::parser_tools::content_parser::{ParsedElement, ParsedSpan};
5use frontend::common::parser_tools::fragment_schema::{
6    FragmentBlock, FragmentData, FragmentElement, FragmentTable, FragmentTableCell,
7};
8
9/// A piece of rich text that can be inserted into a [`TextDocument`](crate::TextDocument).
10///
11/// `DocumentFragment` is the clipboard/interchange type. It carries
12/// blocks, per-character format runs, image anchors, and structural
13/// metadata in a format-agnostic internal representation.
14#[derive(Debug, Clone)]
15pub struct DocumentFragment {
16    data: String,
17    plain_text: String,
18}
19
20impl DocumentFragment {
21    /// Create an empty fragment.
22    pub fn new() -> Self {
23        Self {
24            data: String::new(),
25            plain_text: String::new(),
26        }
27    }
28
29    /// Create a fragment from plain text.
30    ///
31    /// Builds valid fragment data so the fragment can be inserted via
32    /// [`TextCursor::insert_fragment`](crate::TextCursor::insert_fragment).
33    pub fn from_plain_text(text: &str) -> Self {
34        let blocks: Vec<FragmentBlock> = text
35            .split('\n')
36            .map(|line| FragmentBlock {
37                plain_text: line.to_string(),
38                elements: vec![FragmentElement {
39                    content: InlineContent::Text(line.to_string()),
40                    fmt_font_family: None,
41                    fmt_font_point_size: None,
42                    fmt_font_weight: None,
43                    fmt_font_bold: None,
44                    fmt_font_italic: None,
45                    fmt_font_underline: None,
46                    fmt_font_overline: None,
47                    fmt_font_strikeout: None,
48                    fmt_letter_spacing: None,
49                    fmt_word_spacing: None,
50                    fmt_anchor_href: None,
51                    fmt_anchor_names: vec![],
52                    fmt_is_anchor: None,
53                    fmt_tooltip: None,
54                    fmt_underline_style: None,
55                    fmt_vertical_alignment: None,
56                }],
57                heading_level: None,
58                list: None,
59                alignment: None,
60                indent: None,
61                text_indent: None,
62                marker: None,
63                top_margin: None,
64                bottom_margin: None,
65                left_margin: None,
66                right_margin: None,
67                tab_positions: vec![],
68                line_height: None,
69                non_breakable_lines: None,
70                direction: None,
71                background_color: None,
72                is_code_block: None,
73                code_language: None,
74            })
75            .collect();
76
77        let data = serde_json::to_string(&FragmentData {
78            blocks,
79            tables: vec![],
80        })
81        .expect("fragment serialization should not fail");
82
83        Self {
84            data,
85            plain_text: text.to_string(),
86        }
87    }
88
89    /// Create a fragment from HTML.
90    pub fn from_html(html: &str) -> Self {
91        let parsed = frontend::common::parser_tools::content_parser::parse_html_elements(html);
92        parsed_elements_to_fragment(parsed)
93    }
94
95    /// Create a fragment from Markdown.
96    pub fn from_markdown(markdown: &str) -> Self {
97        let parsed = frontend::common::parser_tools::content_parser::parse_markdown(markdown);
98        parsed_elements_to_fragment(parsed)
99    }
100
101    /// Create a fragment from an entire document.
102    pub fn from_document(doc: &crate::TextDocument) -> crate::Result<Self> {
103        let inner = doc.inner.lock();
104        // Use i64::MAX as anchor to ensure the full document is captured.
105        // Document positions include inter-block gaps, so character_count
106        // alone would truncate the last block.
107        let dto = frontend::document_inspection::ExtractFragmentDto {
108            position: 0,
109            anchor: i64::MAX,
110        };
111        let result =
112            frontend::commands::document_inspection_commands::extract_fragment(&inner.ctx, &dto)?;
113        Ok(Self::from_raw(result.fragment_data, result.plain_text))
114    }
115
116    /// Create a fragment from the serialized internal format.
117    pub(crate) fn from_raw(data: String, plain_text: String) -> Self {
118        Self { data, plain_text }
119    }
120
121    /// Export the fragment as plain text.
122    pub fn to_plain_text(&self) -> &str {
123        &self.plain_text
124    }
125
126    /// Export the fragment as HTML.
127    pub fn to_html(&self) -> String {
128        if self.data.is_empty() {
129            return String::from("<html><head><meta charset=\"utf-8\"></head><body></body></html>");
130        }
131
132        let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
133            Ok(d) => d,
134            Err(_) => {
135                return String::from(
136                    "<html><head><meta charset=\"utf-8\"></head><body></body></html>",
137                );
138            }
139        };
140
141        let mut body = String::new();
142        let blocks = &fragment_data.blocks;
143
144        // Single inline-only block with no tables: emit inline HTML without block wrapper
145        if blocks.len() == 1 && blocks[0].is_inline_only() && fragment_data.tables.is_empty() {
146            push_inline_html(&mut body, &blocks[0].elements);
147            return format!(
148                "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
149                body
150            );
151        }
152
153        // Sort tables by block_insert_index so we can interleave them
154        let mut sorted_tables: Vec<&FragmentTable> = fragment_data.tables.iter().collect();
155        sorted_tables.sort_by_key(|t| t.block_insert_index);
156        let mut table_cursor = 0;
157
158        let mut i = 0;
159
160        while i < blocks.len() {
161            // Insert any tables whose block_insert_index == i
162            while table_cursor < sorted_tables.len()
163                && sorted_tables[table_cursor].block_insert_index <= i
164            {
165                push_table_html(&mut body, sorted_tables[table_cursor]);
166                table_cursor += 1;
167            }
168
169            let block = &blocks[i];
170
171            if let Some(ref list) = block.list {
172                let is_ordered = is_ordered_list_style(&list.style);
173                let list_tag = if is_ordered { "ol" } else { "ul" };
174                body.push('<');
175                body.push_str(list_tag);
176                body.push('>');
177
178                while i < blocks.len() {
179                    let b = &blocks[i];
180                    match &b.list {
181                        Some(l) if is_ordered_list_style(&l.style) == is_ordered => {
182                            body.push_str("<li>");
183                            push_inline_html(&mut body, &b.elements);
184                            body.push_str("</li>");
185                            i += 1;
186                        }
187                        _ => break,
188                    }
189                }
190
191                body.push_str("</");
192                body.push_str(list_tag);
193                body.push('>');
194            } else if let Some(level) = block.heading_level {
195                let n = level.clamp(1, 6);
196                body.push_str(&format!("<h{}>", n));
197                push_inline_html(&mut body, &block.elements);
198                body.push_str(&format!("</h{}>", n));
199                i += 1;
200            } else {
201                // Emit block-level formatting as inline styles (ISSUE-19)
202                let style = block_style_attr(block);
203                if style.is_empty() {
204                    body.push_str("<p>");
205                } else {
206                    body.push_str(&format!("<p style=\"{}\">", style));
207                }
208                push_inline_html(&mut body, &block.elements);
209                body.push_str("</p>");
210                i += 1;
211            }
212        }
213
214        // Emit any remaining tables after all blocks
215        while table_cursor < sorted_tables.len() {
216            push_table_html(&mut body, sorted_tables[table_cursor]);
217            table_cursor += 1;
218        }
219
220        format!(
221            "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
222            body
223        )
224    }
225
226    /// Export the fragment as Markdown.
227    pub fn to_markdown(&self) -> String {
228        if self.data.is_empty() {
229            return String::new();
230        }
231
232        let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
233            Ok(d) => d,
234            Err(_) => return String::new(),
235        };
236
237        // (rendered_text, is_list_item) — used for join logic
238        let mut parts: Vec<(String, bool)> = Vec::new();
239        let mut prev_was_list = false;
240        let mut list_counter: u32 = 0;
241
242        // Sort tables by block_insert_index for interleaving
243        let mut sorted_tables: Vec<&FragmentTable> = fragment_data.tables.iter().collect();
244        sorted_tables.sort_by_key(|t| t.block_insert_index);
245        let mut table_cursor = 0;
246
247        for (blk_idx, block) in fragment_data.blocks.iter().enumerate() {
248            // Insert tables before this block index
249            while table_cursor < sorted_tables.len()
250                && sorted_tables[table_cursor].block_insert_index <= blk_idx
251            {
252                parts.push((render_table_markdown(sorted_tables[table_cursor]), false));
253                prev_was_list = false;
254                list_counter = 0;
255                table_cursor += 1;
256            }
257
258            let inline_text = render_inline_markdown(&block.elements);
259            let is_list = block.list.is_some();
260
261            let indent_prefix = match block.indent {
262                Some(n) if n > 0 => "  ".repeat(n as usize),
263                _ => String::new(),
264            };
265
266            if let Some(level) = block.heading_level {
267                let n = level.clamp(1, 6) as usize;
268                let prefix = "#".repeat(n);
269                parts.push((format!("{} {}", prefix, inline_text), false));
270                prev_was_list = false;
271                list_counter = 0;
272            } else if let Some(ref list) = block.list {
273                let is_ordered = is_ordered_list_style(&list.style);
274                if !prev_was_list {
275                    list_counter = 0;
276                }
277                if is_ordered {
278                    list_counter += 1;
279                    parts.push((
280                        format!("{}{}. {}", indent_prefix, list_counter, inline_text),
281                        true,
282                    ));
283                } else {
284                    parts.push((format!("{}- {}", indent_prefix, inline_text), true));
285                }
286                prev_was_list = true;
287            } else {
288                if indent_prefix.is_empty() {
289                    parts.push((inline_text, false));
290                } else {
291                    parts.push((format!("{}{}", indent_prefix, inline_text), false));
292                }
293                prev_was_list = false;
294                list_counter = 0;
295            }
296
297            if !is_list {
298                prev_was_list = false;
299            }
300        }
301
302        // Emit remaining tables after all blocks
303        while table_cursor < sorted_tables.len() {
304            parts.push((render_table_markdown(sorted_tables[table_cursor]), false));
305            table_cursor += 1;
306        }
307
308        // Join: list items with \n, others with \n\n
309        let mut result = String::new();
310        for (idx, (text, is_list)) in parts.iter().enumerate() {
311            if idx > 0 {
312                let (_, prev_is_list) = &parts[idx - 1];
313                if *prev_is_list && *is_list {
314                    result.push('\n');
315                } else {
316                    result.push_str("\n\n");
317                }
318            }
319            result.push_str(text);
320        }
321
322        result
323    }
324
325    /// Returns true if the fragment contains no text or elements.
326    pub fn is_empty(&self) -> bool {
327        self.plain_text.is_empty()
328    }
329
330    /// Returns the serialized internal representation.
331    pub(crate) fn raw_data(&self) -> &str {
332        &self.data
333    }
334}
335
336impl Default for DocumentFragment {
337    fn default() -> Self {
338        Self::new()
339    }
340}
341
342// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
343// Shared helpers (used by both to_html and to_markdown)
344// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
345
346fn is_ordered_list_style(style: &ListStyle) -> bool {
347    matches!(
348        style,
349        ListStyle::Decimal
350            | ListStyle::LowerAlpha
351            | ListStyle::UpperAlpha
352            | ListStyle::LowerRoman
353            | ListStyle::UpperRoman
354    )
355}
356
357// ── HTML helpers ────────────────────────────────────────────────
358
359fn escape_html(s: &str) -> String {
360    let mut out = String::with_capacity(s.len());
361    for c in s.chars() {
362        match c {
363            '&' => out.push_str("&amp;"),
364            '<' => out.push_str("&lt;"),
365            '>' => out.push_str("&gt;"),
366            '"' => out.push_str("&quot;"),
367            '\'' => out.push_str("&#x27;"),
368            // A raw CR in text content is normalised to LF by the HTML5 input
369            // preprocessor on re-import (CR-from-`&#xD;` survives, literal CR
370            // does not), which breaks serialiser idempotency. Emit it as a
371            // numeric reference so it round-trips losslessly.
372            '\r' => out.push_str("&#13;"),
373            _ => out.push(c),
374        }
375    }
376    out
377}
378
379/// Build a CSS `style` attribute value from block-level formatting (ISSUE-19).
380fn block_style_attr(block: &FragmentBlock) -> String {
381    use crate::Alignment;
382
383    let mut parts = Vec::new();
384    if let Some(ref alignment) = block.alignment {
385        let value = match alignment {
386            Alignment::Left => "left",
387            Alignment::Right => "right",
388            Alignment::Center => "center",
389            Alignment::Justify => "justify",
390        };
391        parts.push(format!("text-align: {}", value));
392    }
393    if let Some(n) = block.indent
394        && n > 0
395    {
396        parts.push(format!("margin-left: {}em", n));
397    }
398    if let Some(px) = block.text_indent
399        && px != 0
400    {
401        parts.push(format!("text-indent: {}px", px));
402    }
403    if let Some(px) = block.top_margin {
404        parts.push(format!("margin-top: {}px", px));
405    }
406    if let Some(px) = block.bottom_margin {
407        parts.push(format!("margin-bottom: {}px", px));
408    }
409    if let Some(px) = block.left_margin {
410        parts.push(format!("margin-left: {}px", px));
411    }
412    if let Some(px) = block.right_margin {
413        parts.push(format!("margin-right: {}px", px));
414    }
415    parts.join("; ")
416}
417
418fn push_inline_html(out: &mut String, elements: &[FragmentElement]) {
419    for elem in elements {
420        let text = match &elem.content {
421            InlineContent::Text(t) => escape_html(t),
422            InlineContent::Image {
423                name,
424                width,
425                height,
426                ..
427            } => {
428                format!(
429                    "<img src=\"{}\" width=\"{}\" height=\"{}\">",
430                    escape_html(name),
431                    width,
432                    height
433                )
434            }
435            InlineContent::Empty => String::new(),
436        };
437
438        let is_monospace = elem
439            .fmt_font_family
440            .as_deref()
441            .is_some_and(|f| f == "monospace");
442        let is_bold = elem.fmt_font_bold.unwrap_or(false);
443        let is_italic = elem.fmt_font_italic.unwrap_or(false);
444        let is_underline = elem.fmt_font_underline.unwrap_or(false);
445        let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
446        let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
447
448        let mut result = text;
449
450        if is_monospace {
451            result = format!("<code>{}</code>", result);
452        }
453        if is_bold {
454            result = format!("<strong>{}</strong>", result);
455        }
456        if is_italic {
457            result = format!("<em>{}</em>", result);
458        }
459        if is_underline {
460            result = format!("<u>{}</u>", result);
461        }
462        if is_strikeout {
463            result = format!("<s>{}</s>", result);
464        }
465        if is_anchor && let Some(ref href) = elem.fmt_anchor_href {
466            result = format!("<a href=\"{}\">{}</a>", escape_html(href), result);
467        }
468
469        out.push_str(&result);
470    }
471}
472
473/// Emit an HTML `<table>` for a `FragmentTable`.
474fn push_table_html(out: &mut String, table: &FragmentTable) {
475    out.push_str("<table>");
476    for row in 0..table.rows {
477        out.push_str("<tr>");
478        for col in 0..table.columns {
479            if let Some(cell) = table.cells.iter().find(|c| c.row == row && c.column == col) {
480                out.push_str("<td");
481                if cell.row_span > 1 {
482                    out.push_str(&format!(" rowspan=\"{}\"", cell.row_span));
483                }
484                if cell.column_span > 1 {
485                    out.push_str(&format!(" colspan=\"{}\"", cell.column_span));
486                }
487                out.push('>');
488                for (i, block) in cell.blocks.iter().enumerate() {
489                    if i > 0 {
490                        out.push_str("<br>");
491                    }
492                    push_inline_html(out, &block.elements);
493                }
494                out.push_str("</td>");
495            }
496            // Skip positions covered by spans — the HTML renderer handles them.
497        }
498        out.push_str("</tr>");
499    }
500    out.push_str("</table>");
501}
502
503// ── Markdown helpers ────────────────────────────────────────────
504
505fn escape_markdown(s: &str) -> String {
506    let mut out = String::with_capacity(s.len());
507    for c in s.chars() {
508        if matches!(
509            c,
510            '\\' | '`'
511                | '*'
512                | '_'
513                | '{'
514                | '}'
515                | '['
516                | ']'
517                | '('
518                | ')'
519                | '#'
520                | '+'
521                | '-'
522                | '.'
523                | '!'
524                | '|'
525                | '~'
526                | '<'
527                | '>'
528        ) {
529            out.push('\\');
530        }
531        out.push(c);
532    }
533    out
534}
535
536fn render_inline_markdown(elements: &[FragmentElement]) -> String {
537    let mut out = String::new();
538    for elem in elements {
539        let raw_text = match &elem.content {
540            InlineContent::Text(t) => t.clone(),
541            InlineContent::Image { name, .. } => format!("![{}]({})", name, name),
542            InlineContent::Empty => String::new(),
543        };
544
545        let is_monospace = elem
546            .fmt_font_family
547            .as_deref()
548            .is_some_and(|f| f == "monospace");
549        let is_bold = elem.fmt_font_bold.unwrap_or(false);
550        let is_italic = elem.fmt_font_italic.unwrap_or(false);
551        let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
552        let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
553
554        if is_monospace {
555            out.push('`');
556            out.push_str(&raw_text);
557            out.push('`');
558        } else {
559            let mut text = escape_markdown(&raw_text);
560            if is_bold && is_italic {
561                text = format!("***{}***", text);
562            } else if is_bold {
563                text = format!("**{}**", text);
564            } else if is_italic {
565                text = format!("*{}*", text);
566            }
567            if is_strikeout {
568                text = format!("~~{}~~", text);
569            }
570            if is_anchor {
571                let href = elem.fmt_anchor_href.as_deref().unwrap_or("");
572                out.push_str(&format!("[{}]({})", text, href));
573            } else {
574                out.push_str(&text);
575            }
576        }
577    }
578    out
579}
580
581/// Render a `FragmentTable` as a pipe-delimited Markdown table.
582fn render_table_markdown(table: &FragmentTable) -> String {
583    let mut rows: Vec<Vec<String>> = vec![vec![String::new(); table.columns]; table.rows];
584
585    for cell in &table.cells {
586        let text: String = cell
587            .blocks
588            .iter()
589            .map(|b| render_inline_markdown(&b.elements))
590            .collect::<Vec<_>>()
591            .join(" ");
592        if cell.row < table.rows && cell.column < table.columns {
593            rows[cell.row][cell.column] = text;
594        }
595    }
596
597    let mut out = String::new();
598    for (i, row) in rows.iter().enumerate() {
599        out.push_str("| ");
600        out.push_str(&row.join(" | "));
601        out.push_str(" |");
602        if i == 0 {
603            // Header separator
604            out.push('\n');
605            out.push('|');
606            for _ in 0..table.columns {
607                out.push_str(" --- |");
608            }
609        }
610        if i + 1 < rows.len() {
611            out.push('\n');
612        }
613    }
614    out
615}
616
617// ── Fragment construction from parsed content ───────────────────
618
619/// Convert parsed blocks (from HTML or Markdown parser) into a `DocumentFragment`.
620/// Convert a `ParsedSpan` to a `FragmentElement`.
621fn span_to_fragment_element(span: &ParsedSpan) -> FragmentElement {
622    let content = InlineContent::Text(span.text.clone());
623    let fmt_font_family = if span.code {
624        Some("monospace".into())
625    } else {
626        None
627    };
628    let fmt_font_bold = if span.bold { Some(true) } else { None };
629    let fmt_font_italic = if span.italic { Some(true) } else { None };
630    let fmt_font_underline = if span.underline { Some(true) } else { None };
631    let fmt_font_strikeout = if span.strikeout { Some(true) } else { None };
632    let (fmt_anchor_href, fmt_is_anchor) = if let Some(ref href) = span.link_href {
633        (Some(href.clone()), Some(true))
634    } else {
635        (None, None)
636    };
637
638    FragmentElement {
639        content,
640        fmt_font_family,
641        fmt_font_point_size: None,
642        fmt_font_weight: None,
643        fmt_font_bold,
644        fmt_font_italic,
645        fmt_font_underline,
646        fmt_font_overline: None,
647        fmt_font_strikeout,
648        fmt_letter_spacing: None,
649        fmt_word_spacing: None,
650        fmt_anchor_href,
651        fmt_anchor_names: vec![],
652        fmt_is_anchor,
653        fmt_tooltip: None,
654        fmt_underline_style: None,
655        fmt_vertical_alignment: None,
656    }
657}
658
659/// Convert parsed elements (blocks + tables) into a `DocumentFragment`,
660/// preserving table structure as `FragmentTable` entries.
661fn parsed_elements_to_fragment(parsed: Vec<ParsedElement>) -> DocumentFragment {
662    use frontend::common::parser_tools::fragment_schema::FragmentList;
663
664    let mut blocks: Vec<FragmentBlock> = Vec::new();
665    let mut tables: Vec<FragmentTable> = Vec::new();
666
667    for elem in parsed {
668        match elem {
669            ParsedElement::Block(pb) => {
670                let elements: Vec<FragmentElement> =
671                    pb.spans.iter().map(span_to_fragment_element).collect();
672                let plain_text: String = pb.spans.iter().map(|s| s.text.as_str()).collect();
673                let list = pb.list_style.map(|style| FragmentList {
674                    style,
675                    indent: pb.list_indent as i64,
676                    prefix: String::new(),
677                    suffix: String::new(),
678                });
679
680                blocks.push(FragmentBlock {
681                    plain_text,
682                    elements,
683                    heading_level: pb.heading_level,
684                    list,
685                    alignment: None,
686                    indent: None,
687                    text_indent: None,
688                    marker: None,
689                    top_margin: None,
690                    bottom_margin: None,
691                    left_margin: None,
692                    right_margin: None,
693                    tab_positions: vec![],
694                    line_height: pb.line_height,
695                    non_breakable_lines: pb.non_breakable_lines,
696                    direction: pb.direction,
697                    background_color: pb.background_color,
698                    is_code_block: None,
699                    code_language: None,
700                });
701            }
702            ParsedElement::Table(pt) => {
703                let block_insert_index = blocks.len();
704                let num_columns = pt.rows.iter().map(|r| r.len()).max().unwrap_or(0);
705                let num_rows = pt.rows.len();
706
707                let mut frag_cells: Vec<FragmentTableCell> = Vec::new();
708                for (row_idx, row) in pt.rows.iter().enumerate() {
709                    for (col_idx, cell) in row.iter().enumerate() {
710                        let cell_elements: Vec<FragmentElement> =
711                            cell.spans.iter().map(span_to_fragment_element).collect();
712                        let cell_text: String =
713                            cell.spans.iter().map(|s| s.text.as_str()).collect();
714
715                        frag_cells.push(FragmentTableCell {
716                            row: row_idx,
717                            column: col_idx,
718                            row_span: 1,
719                            column_span: 1,
720                            blocks: vec![FragmentBlock {
721                                plain_text: cell_text,
722                                elements: cell_elements,
723                                heading_level: None,
724                                list: None,
725                                alignment: None,
726                                indent: None,
727                                text_indent: None,
728                                marker: None,
729                                top_margin: None,
730                                bottom_margin: None,
731                                left_margin: None,
732                                right_margin: None,
733                                tab_positions: vec![],
734                                line_height: None,
735                                non_breakable_lines: None,
736                                direction: None,
737                                background_color: None,
738                                is_code_block: None,
739                                code_language: None,
740                            }],
741                            fmt_padding: None,
742                            fmt_border: None,
743                            fmt_vertical_alignment: None,
744                            fmt_background_color: None,
745                        });
746                    }
747                }
748
749                tables.push(FragmentTable {
750                    rows: num_rows,
751                    columns: num_columns,
752                    cells: frag_cells,
753                    block_insert_index,
754                    fmt_border: None,
755                    fmt_cell_spacing: None,
756                    fmt_cell_padding: None,
757                    fmt_width: None,
758                    fmt_alignment: None,
759                    column_widths: vec![],
760                });
761            }
762        }
763    }
764
765    let data = serde_json::to_string(&FragmentData { blocks, tables })
766        .expect("fragment serialization should not fail");
767
768    let plain_text = parsed_plain_text_from_data(&data);
769
770    DocumentFragment { data, plain_text }
771}
772
773/// Extract plain text from serialized fragment data.
774fn parsed_plain_text_from_data(data: &str) -> String {
775    let fragment_data: FragmentData = match serde_json::from_str(data) {
776        Ok(d) => d,
777        Err(_) => return String::new(),
778    };
779
780    fragment_data
781        .blocks
782        .iter()
783        .map(|b| b.plain_text.as_str())
784        .collect::<Vec<_>>()
785        .join("\n")
786}