Skip to main content

text_document/
fragment.rs

1//! DocumentFragment — format-agnostic rich text interchange type.
2
3use crate::{InlineContent, ListStyle};
4use frontend::common::parser_tools::fragment_schema::{
5    FragmentBlock, FragmentData, FragmentElement,
6};
7
8/// A piece of rich text that can be inserted into a [`TextDocument`](crate::TextDocument).
9///
10/// `DocumentFragment` is the clipboard/interchange type. It carries
11/// blocks, inline elements, and formatting in a format-agnostic
12/// internal representation.
13#[derive(Debug, Clone)]
14pub struct DocumentFragment {
15    data: String,
16    plain_text: String,
17}
18
19impl DocumentFragment {
20    /// Create an empty fragment.
21    pub fn new() -> Self {
22        Self {
23            data: String::new(),
24            plain_text: String::new(),
25        }
26    }
27
28    /// Create a fragment from plain text.
29    ///
30    /// Builds valid fragment data so the fragment can be inserted via
31    /// [`TextCursor::insert_fragment`](crate::TextCursor::insert_fragment).
32    pub fn from_plain_text(text: &str) -> Self {
33        let blocks: Vec<FragmentBlock> = text
34            .split('\n')
35            .map(|line| FragmentBlock {
36                plain_text: line.to_string(),
37                elements: vec![FragmentElement {
38                    content: InlineContent::Text(line.to_string()),
39                    fmt_font_family: None,
40                    fmt_font_point_size: None,
41                    fmt_font_weight: None,
42                    fmt_font_bold: None,
43                    fmt_font_italic: None,
44                    fmt_font_underline: None,
45                    fmt_font_overline: None,
46                    fmt_font_strikeout: None,
47                    fmt_letter_spacing: None,
48                    fmt_word_spacing: None,
49                    fmt_anchor_href: None,
50                    fmt_anchor_names: vec![],
51                    fmt_is_anchor: None,
52                    fmt_tooltip: None,
53                    fmt_underline_style: None,
54                    fmt_vertical_alignment: None,
55                }],
56                heading_level: None,
57                list: None,
58                alignment: None,
59                indent: None,
60                text_indent: None,
61                marker: None,
62                top_margin: None,
63                bottom_margin: None,
64                left_margin: None,
65                right_margin: None,
66                tab_positions: vec![],
67                line_height: None,
68                non_breakable_lines: None,
69                direction: None,
70                background_color: None,
71                is_code_block: None,
72                code_language: None,
73            })
74            .collect();
75
76        let data = serde_json::to_string(&FragmentData {
77            blocks,
78            tables: vec![],
79        })
80        .expect("fragment serialization should not fail");
81
82        Self {
83            data,
84            plain_text: text.to_string(),
85        }
86    }
87
88    /// Create a fragment from HTML.
89    pub fn from_html(html: &str) -> Self {
90        let parsed = frontend::common::parser_tools::content_parser::parse_html(html);
91        parsed_blocks_to_fragment(parsed)
92    }
93
94    /// Create a fragment from Markdown.
95    pub fn from_markdown(markdown: &str) -> Self {
96        let parsed = frontend::common::parser_tools::content_parser::parse_markdown(markdown);
97        let blocks =
98            frontend::common::parser_tools::content_parser::ParsedElement::flatten_to_blocks(
99                parsed,
100            );
101        parsed_blocks_to_fragment(blocks)
102    }
103
104    /// Create a fragment from an entire document.
105    pub fn from_document(doc: &crate::TextDocument) -> crate::Result<Self> {
106        let inner = doc.inner.lock();
107        let char_count = {
108            let stats =
109                frontend::commands::document_inspection_commands::get_document_stats(&inner.ctx)?;
110            crate::convert::to_usize(stats.character_count)
111        };
112        let dto = frontend::document_inspection::ExtractFragmentDto {
113            position: 0,
114            anchor: crate::convert::to_i64(char_count),
115        };
116        let result =
117            frontend::commands::document_inspection_commands::extract_fragment(&inner.ctx, &dto)?;
118        Ok(Self::from_raw(result.fragment_data, result.plain_text))
119    }
120
121    /// Create a fragment from the serialized internal format.
122    pub(crate) fn from_raw(data: String, plain_text: String) -> Self {
123        Self { data, plain_text }
124    }
125
126    /// Export the fragment as plain text.
127    pub fn to_plain_text(&self) -> &str {
128        &self.plain_text
129    }
130
131    /// Export the fragment as HTML.
132    pub fn to_html(&self) -> String {
133        if self.data.is_empty() {
134            return String::from("<html><head><meta charset=\"utf-8\"></head><body></body></html>");
135        }
136
137        let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
138            Ok(d) => d,
139            Err(_) => {
140                return String::from(
141                    "<html><head><meta charset=\"utf-8\"></head><body></body></html>",
142                );
143            }
144        };
145
146        let mut body = String::new();
147        let blocks = &fragment_data.blocks;
148
149        // Single inline-only block: emit inline HTML without block wrapper
150        if blocks.len() == 1 && blocks[0].is_inline_only() {
151            push_inline_html(&mut body, &blocks[0].elements);
152            return format!(
153                "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
154                body
155            );
156        }
157
158        let mut i = 0;
159
160        while i < blocks.len() {
161            let block = &blocks[i];
162
163            if let Some(ref list) = block.list {
164                let is_ordered = is_ordered_list_style(&list.style);
165                let list_tag = if is_ordered { "ol" } else { "ul" };
166                body.push('<');
167                body.push_str(list_tag);
168                body.push('>');
169
170                while i < blocks.len() {
171                    let b = &blocks[i];
172                    match &b.list {
173                        Some(l) if is_ordered_list_style(&l.style) == is_ordered => {
174                            body.push_str("<li>");
175                            push_inline_html(&mut body, &b.elements);
176                            body.push_str("</li>");
177                            i += 1;
178                        }
179                        _ => break,
180                    }
181                }
182
183                body.push_str("</");
184                body.push_str(list_tag);
185                body.push('>');
186            } else if let Some(level) = block.heading_level {
187                let n = level.clamp(1, 6);
188                body.push_str(&format!("<h{}>", n));
189                push_inline_html(&mut body, &block.elements);
190                body.push_str(&format!("</h{}>", n));
191                i += 1;
192            } else {
193                // Emit block-level formatting as inline styles (ISSUE-19)
194                let style = block_style_attr(block);
195                if style.is_empty() {
196                    body.push_str("<p>");
197                } else {
198                    body.push_str(&format!("<p style=\"{}\">", style));
199                }
200                push_inline_html(&mut body, &block.elements);
201                body.push_str("</p>");
202                i += 1;
203            }
204        }
205
206        format!(
207            "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
208            body
209        )
210    }
211
212    /// Export the fragment as Markdown.
213    pub fn to_markdown(&self) -> String {
214        if self.data.is_empty() {
215            return String::new();
216        }
217
218        let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
219            Ok(d) => d,
220            Err(_) => return String::new(),
221        };
222
223        let mut parts: Vec<String> = Vec::new();
224        let mut prev_was_list = false;
225        let mut list_counter: u32 = 0;
226
227        for block in &fragment_data.blocks {
228            let inline_text = render_inline_markdown(&block.elements);
229            let is_list = block.list.is_some();
230
231            // Markdown indent prefix from block indent level (ISSUE-19)
232            let indent_prefix = match block.indent {
233                Some(n) if n > 0 => "  ".repeat(n as usize),
234                _ => String::new(),
235            };
236
237            if let Some(level) = block.heading_level {
238                let n = level.clamp(1, 6) as usize;
239                let prefix = "#".repeat(n);
240                parts.push(format!("{} {}", prefix, inline_text));
241                prev_was_list = false;
242                list_counter = 0;
243            } else if let Some(ref list) = block.list {
244                let is_ordered = is_ordered_list_style(&list.style);
245                if !prev_was_list {
246                    list_counter = 0;
247                }
248                if is_ordered {
249                    list_counter += 1;
250                    parts.push(format!(
251                        "{}{}. {}",
252                        indent_prefix, list_counter, inline_text
253                    ));
254                } else {
255                    parts.push(format!("{}- {}", indent_prefix, inline_text));
256                }
257                prev_was_list = true;
258            } else {
259                // Prepend blockquote-style indent for indented paragraphs
260                if indent_prefix.is_empty() {
261                    parts.push(inline_text);
262                } else {
263                    parts.push(format!("{}{}", indent_prefix, inline_text));
264                }
265                prev_was_list = false;
266                list_counter = 0;
267            }
268
269            if !is_list {
270                prev_was_list = false;
271            }
272        }
273
274        // Join: list items with \n, others with \n\n
275        let mut result = String::new();
276        let blocks = &fragment_data.blocks;
277        for (idx, part) in parts.iter().enumerate() {
278            if idx > 0 {
279                let prev_is_list = blocks[idx - 1].list.is_some();
280                let curr_is_list = blocks[idx].list.is_some();
281                if prev_is_list && curr_is_list {
282                    result.push('\n');
283                } else {
284                    result.push_str("\n\n");
285                }
286            }
287            result.push_str(part);
288        }
289
290        result
291    }
292
293    /// Returns true if the fragment contains no text or elements.
294    pub fn is_empty(&self) -> bool {
295        self.plain_text.is_empty()
296    }
297
298    /// Returns the serialized internal representation.
299    pub(crate) fn raw_data(&self) -> &str {
300        &self.data
301    }
302}
303
304impl Default for DocumentFragment {
305    fn default() -> Self {
306        Self::new()
307    }
308}
309
310// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
311// Shared helpers (used by both to_html and to_markdown)
312// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
313
314fn is_ordered_list_style(style: &ListStyle) -> bool {
315    matches!(
316        style,
317        ListStyle::Decimal
318            | ListStyle::LowerAlpha
319            | ListStyle::UpperAlpha
320            | ListStyle::LowerRoman
321            | ListStyle::UpperRoman
322    )
323}
324
325// ── HTML helpers ────────────────────────────────────────────────
326
327fn escape_html(s: &str) -> String {
328    let mut out = String::with_capacity(s.len());
329    for c in s.chars() {
330        match c {
331            '&' => out.push_str("&amp;"),
332            '<' => out.push_str("&lt;"),
333            '>' => out.push_str("&gt;"),
334            '"' => out.push_str("&quot;"),
335            '\'' => out.push_str("&#x27;"),
336            _ => out.push(c),
337        }
338    }
339    out
340}
341
342/// Build a CSS `style` attribute value from block-level formatting (ISSUE-19).
343fn block_style_attr(block: &FragmentBlock) -> String {
344    use crate::Alignment;
345
346    let mut parts = Vec::new();
347    if let Some(ref alignment) = block.alignment {
348        let value = match alignment {
349            Alignment::Left => "left",
350            Alignment::Right => "right",
351            Alignment::Center => "center",
352            Alignment::Justify => "justify",
353        };
354        parts.push(format!("text-align: {}", value));
355    }
356    if let Some(n) = block.indent
357        && n > 0
358    {
359        parts.push(format!("margin-left: {}em", n));
360    }
361    if let Some(px) = block.text_indent
362        && px != 0
363    {
364        parts.push(format!("text-indent: {}px", px));
365    }
366    if let Some(px) = block.top_margin {
367        parts.push(format!("margin-top: {}px", px));
368    }
369    if let Some(px) = block.bottom_margin {
370        parts.push(format!("margin-bottom: {}px", px));
371    }
372    if let Some(px) = block.left_margin {
373        parts.push(format!("margin-left: {}px", px));
374    }
375    if let Some(px) = block.right_margin {
376        parts.push(format!("margin-right: {}px", px));
377    }
378    parts.join("; ")
379}
380
381fn push_inline_html(out: &mut String, elements: &[FragmentElement]) {
382    for elem in elements {
383        let text = match &elem.content {
384            InlineContent::Text(t) => escape_html(t),
385            InlineContent::Image {
386                name,
387                width,
388                height,
389                ..
390            } => {
391                format!(
392                    "<img src=\"{}\" width=\"{}\" height=\"{}\">",
393                    escape_html(name),
394                    width,
395                    height
396                )
397            }
398            InlineContent::Empty => String::new(),
399        };
400
401        let is_monospace = elem
402            .fmt_font_family
403            .as_deref()
404            .is_some_and(|f| f == "monospace");
405        let is_bold = elem.fmt_font_bold.unwrap_or(false);
406        let is_italic = elem.fmt_font_italic.unwrap_or(false);
407        let is_underline = elem.fmt_font_underline.unwrap_or(false);
408        let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
409        let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
410
411        let mut result = text;
412
413        if is_monospace {
414            result = format!("<code>{}</code>", result);
415        }
416        if is_bold {
417            result = format!("<strong>{}</strong>", result);
418        }
419        if is_italic {
420            result = format!("<em>{}</em>", result);
421        }
422        if is_underline {
423            result = format!("<u>{}</u>", result);
424        }
425        if is_strikeout {
426            result = format!("<s>{}</s>", result);
427        }
428        if is_anchor && let Some(ref href) = elem.fmt_anchor_href {
429            result = format!("<a href=\"{}\">{}</a>", escape_html(href), result);
430        }
431
432        out.push_str(&result);
433    }
434}
435
436// ── Markdown helpers ────────────────────────────────────────────
437
438fn escape_markdown(s: &str) -> String {
439    let mut out = String::with_capacity(s.len());
440    for c in s.chars() {
441        if matches!(
442            c,
443            '\\' | '`'
444                | '*'
445                | '_'
446                | '{'
447                | '}'
448                | '['
449                | ']'
450                | '('
451                | ')'
452                | '#'
453                | '+'
454                | '-'
455                | '.'
456                | '!'
457                | '|'
458                | '~'
459                | '<'
460                | '>'
461        ) {
462            out.push('\\');
463        }
464        out.push(c);
465    }
466    out
467}
468
469fn render_inline_markdown(elements: &[FragmentElement]) -> String {
470    let mut out = String::new();
471    for elem in elements {
472        let raw_text = match &elem.content {
473            InlineContent::Text(t) => t.clone(),
474            InlineContent::Image { name, .. } => format!("![{}]({})", name, name),
475            InlineContent::Empty => String::new(),
476        };
477
478        let is_monospace = elem
479            .fmt_font_family
480            .as_deref()
481            .is_some_and(|f| f == "monospace");
482        let is_bold = elem.fmt_font_bold.unwrap_or(false);
483        let is_italic = elem.fmt_font_italic.unwrap_or(false);
484        let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
485        let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
486
487        if is_monospace {
488            out.push('`');
489            out.push_str(&raw_text);
490            out.push('`');
491        } else {
492            let mut text = escape_markdown(&raw_text);
493            if is_bold && is_italic {
494                text = format!("***{}***", text);
495            } else if is_bold {
496                text = format!("**{}**", text);
497            } else if is_italic {
498                text = format!("*{}*", text);
499            }
500            if is_strikeout {
501                text = format!("~~{}~~", text);
502            }
503            if is_anchor {
504                let href = elem.fmt_anchor_href.as_deref().unwrap_or("");
505                out.push_str(&format!("[{}]({})", text, href));
506            } else {
507                out.push_str(&text);
508            }
509        }
510    }
511    out
512}
513
514// ── Fragment construction from parsed content ───────────────────
515
516/// Convert parsed blocks (from HTML or Markdown parser) into a `DocumentFragment`.
517fn parsed_blocks_to_fragment(
518    parsed: Vec<frontend::common::parser_tools::content_parser::ParsedBlock>,
519) -> DocumentFragment {
520    use frontend::common::parser_tools::fragment_schema::FragmentList;
521
522    let blocks: Vec<FragmentBlock> = parsed
523        .into_iter()
524        .map(|pb| {
525            let elements: Vec<FragmentElement> = pb
526                .spans
527                .iter()
528                .map(|span| {
529                    let content = InlineContent::Text(span.text.clone());
530                    let fmt_font_family = if span.code {
531                        Some("monospace".into())
532                    } else {
533                        None
534                    };
535                    let fmt_font_bold = if span.bold { Some(true) } else { None };
536                    let fmt_font_italic = if span.italic { Some(true) } else { None };
537                    let fmt_font_underline = if span.underline { Some(true) } else { None };
538                    let fmt_font_strikeout = if span.strikeout { Some(true) } else { None };
539                    let (fmt_anchor_href, fmt_is_anchor) = if let Some(ref href) = span.link_href {
540                        (Some(href.clone()), Some(true))
541                    } else {
542                        (None, None)
543                    };
544
545                    FragmentElement {
546                        content,
547                        fmt_font_family,
548                        fmt_font_point_size: None,
549                        fmt_font_weight: None,
550                        fmt_font_bold,
551                        fmt_font_italic,
552                        fmt_font_underline,
553                        fmt_font_overline: None,
554                        fmt_font_strikeout,
555                        fmt_letter_spacing: None,
556                        fmt_word_spacing: None,
557                        fmt_anchor_href,
558                        fmt_anchor_names: vec![],
559                        fmt_is_anchor,
560                        fmt_tooltip: None,
561                        fmt_underline_style: None,
562                        fmt_vertical_alignment: None,
563                    }
564                })
565                .collect();
566
567            let plain_text: String = pb.spans.iter().map(|s| s.text.as_str()).collect();
568
569            let list = pb.list_style.map(|style| FragmentList {
570                style,
571                indent: pb.list_indent as i64,
572                prefix: String::new(),
573                suffix: String::new(),
574            });
575
576            FragmentBlock {
577                plain_text,
578                elements,
579                heading_level: pb.heading_level,
580                list,
581                alignment: None,
582                indent: None,
583                text_indent: None,
584                marker: None,
585                top_margin: None,
586                bottom_margin: None,
587                left_margin: None,
588                right_margin: None,
589                tab_positions: vec![],
590                line_height: pb.line_height,
591                non_breakable_lines: pb.non_breakable_lines,
592                direction: pb.direction,
593                background_color: pb.background_color,
594                is_code_block: None,
595                code_language: None,
596            }
597        })
598        .collect();
599
600    let data = serde_json::to_string(&FragmentData {
601        blocks,
602        tables: vec![],
603    })
604    .expect("fragment serialization should not fail");
605
606    let plain_text = parsed_plain_text_from_data(&data);
607
608    DocumentFragment { data, plain_text }
609}
610
611/// Extract plain text from serialized fragment data.
612fn parsed_plain_text_from_data(data: &str) -> String {
613    let fragment_data: FragmentData = match serde_json::from_str(data) {
614        Ok(d) => d,
615        Err(_) => return String::new(),
616    };
617
618    fragment_data
619        .blocks
620        .iter()
621        .map(|b| b.plain_text.as_str())
622        .collect::<Vec<_>>()
623        .join("\n")
624}