Skip to main content

text_document/
fragment.rs

1//! DocumentFragment — format-agnostic rich text interchange type.
2
3use crate::{InlineContent, ListStyle};
4use frontend::common::parser_tools::fragment_schema::{
5    FragmentBlock, FragmentData, FragmentElement,
6};
7
8/// A piece of rich text that can be inserted into a [`TextDocument`](crate::TextDocument).
9///
10/// `DocumentFragment` is the clipboard/interchange type. It carries
11/// blocks, inline elements, and formatting in a format-agnostic
12/// internal representation.
13#[derive(Debug, Clone)]
14pub struct DocumentFragment {
15    data: String,
16    plain_text: String,
17}
18
19impl DocumentFragment {
20    /// Create an empty fragment.
21    pub fn new() -> Self {
22        Self {
23            data: String::new(),
24            plain_text: String::new(),
25        }
26    }
27
28    /// Create a fragment from plain text.
29    ///
30    /// Builds valid fragment data so the fragment can be inserted via
31    /// [`TextCursor::insert_fragment`](crate::TextCursor::insert_fragment).
32    pub fn from_plain_text(text: &str) -> Self {
33        let blocks: Vec<FragmentBlock> = text
34            .split('\n')
35            .map(|line| FragmentBlock {
36                plain_text: line.to_string(),
37                elements: vec![FragmentElement {
38                    content: InlineContent::Text(line.to_string()),
39                    fmt_font_family: None,
40                    fmt_font_point_size: None,
41                    fmt_font_weight: None,
42                    fmt_font_bold: None,
43                    fmt_font_italic: None,
44                    fmt_font_underline: None,
45                    fmt_font_overline: None,
46                    fmt_font_strikeout: None,
47                    fmt_letter_spacing: None,
48                    fmt_word_spacing: None,
49                    fmt_anchor_href: None,
50                    fmt_anchor_names: vec![],
51                    fmt_is_anchor: None,
52                    fmt_tooltip: None,
53                    fmt_underline_style: None,
54                    fmt_vertical_alignment: None,
55                }],
56                heading_level: None,
57                list: None,
58                alignment: None,
59                indent: None,
60                text_indent: None,
61                marker: None,
62                top_margin: None,
63                bottom_margin: None,
64                left_margin: None,
65                right_margin: None,
66                tab_positions: vec![],
67                line_height: None,
68                non_breakable_lines: None,
69                direction: None,
70                background_color: None,
71                is_code_block: None,
72                code_language: None,
73            })
74            .collect();
75
76        let data = serde_json::to_string(&FragmentData { blocks })
77            .expect("fragment serialization should not fail");
78
79        Self {
80            data,
81            plain_text: text.to_string(),
82        }
83    }
84
85    /// Create a fragment from HTML.
86    pub fn from_html(html: &str) -> Self {
87        let parsed = frontend::common::parser_tools::content_parser::parse_html(html);
88        parsed_blocks_to_fragment(parsed)
89    }
90
91    /// Create a fragment from Markdown.
92    pub fn from_markdown(markdown: &str) -> Self {
93        let parsed = frontend::common::parser_tools::content_parser::parse_markdown(markdown);
94        parsed_blocks_to_fragment(parsed)
95    }
96
97    /// Create a fragment from an entire document.
98    pub fn from_document(doc: &crate::TextDocument) -> crate::Result<Self> {
99        let inner = doc.inner.lock();
100        let char_count = {
101            let stats =
102                frontend::commands::document_inspection_commands::get_document_stats(&inner.ctx)?;
103            crate::convert::to_usize(stats.character_count)
104        };
105        let dto = frontend::document_inspection::ExtractFragmentDto {
106            position: 0,
107            anchor: crate::convert::to_i64(char_count),
108        };
109        let result =
110            frontend::commands::document_inspection_commands::extract_fragment(&inner.ctx, &dto)?;
111        Ok(Self::from_raw(result.fragment_data, result.plain_text))
112    }
113
114    /// Create a fragment from the serialized internal format.
115    pub(crate) fn from_raw(data: String, plain_text: String) -> Self {
116        Self { data, plain_text }
117    }
118
119    /// Export the fragment as plain text.
120    pub fn to_plain_text(&self) -> &str {
121        &self.plain_text
122    }
123
124    /// Export the fragment as HTML.
125    pub fn to_html(&self) -> String {
126        if self.data.is_empty() {
127            return String::from("<html><head><meta charset=\"utf-8\"></head><body></body></html>");
128        }
129
130        let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
131            Ok(d) => d,
132            Err(_) => {
133                return String::from(
134                    "<html><head><meta charset=\"utf-8\"></head><body></body></html>",
135                );
136            }
137        };
138
139        let mut body = String::new();
140        let blocks = &fragment_data.blocks;
141
142        // Single inline-only block: emit inline HTML without block wrapper
143        if blocks.len() == 1 && blocks[0].is_inline_only() {
144            push_inline_html(&mut body, &blocks[0].elements);
145            return format!(
146                "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
147                body
148            );
149        }
150
151        let mut i = 0;
152
153        while i < blocks.len() {
154            let block = &blocks[i];
155
156            if let Some(ref list) = block.list {
157                let is_ordered = is_ordered_list_style(&list.style);
158                let list_tag = if is_ordered { "ol" } else { "ul" };
159                body.push('<');
160                body.push_str(list_tag);
161                body.push('>');
162
163                while i < blocks.len() {
164                    let b = &blocks[i];
165                    match &b.list {
166                        Some(l) if is_ordered_list_style(&l.style) == is_ordered => {
167                            body.push_str("<li>");
168                            push_inline_html(&mut body, &b.elements);
169                            body.push_str("</li>");
170                            i += 1;
171                        }
172                        _ => break,
173                    }
174                }
175
176                body.push_str("</");
177                body.push_str(list_tag);
178                body.push('>');
179            } else if let Some(level) = block.heading_level {
180                let n = level.clamp(1, 6);
181                body.push_str(&format!("<h{}>", n));
182                push_inline_html(&mut body, &block.elements);
183                body.push_str(&format!("</h{}>", n));
184                i += 1;
185            } else {
186                // Emit block-level formatting as inline styles (ISSUE-19)
187                let style = block_style_attr(block);
188                if style.is_empty() {
189                    body.push_str("<p>");
190                } else {
191                    body.push_str(&format!("<p style=\"{}\">", style));
192                }
193                push_inline_html(&mut body, &block.elements);
194                body.push_str("</p>");
195                i += 1;
196            }
197        }
198
199        format!(
200            "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
201            body
202        )
203    }
204
205    /// Export the fragment as Markdown.
206    pub fn to_markdown(&self) -> String {
207        if self.data.is_empty() {
208            return String::new();
209        }
210
211        let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
212            Ok(d) => d,
213            Err(_) => return String::new(),
214        };
215
216        let mut parts: Vec<String> = Vec::new();
217        let mut prev_was_list = false;
218        let mut list_counter: u32 = 0;
219
220        for block in &fragment_data.blocks {
221            let inline_text = render_inline_markdown(&block.elements);
222            let is_list = block.list.is_some();
223
224            // Markdown indent prefix from block indent level (ISSUE-19)
225            let indent_prefix = match block.indent {
226                Some(n) if n > 0 => "  ".repeat(n as usize),
227                _ => String::new(),
228            };
229
230            if let Some(level) = block.heading_level {
231                let n = level.clamp(1, 6) as usize;
232                let prefix = "#".repeat(n);
233                parts.push(format!("{} {}", prefix, inline_text));
234                prev_was_list = false;
235                list_counter = 0;
236            } else if let Some(ref list) = block.list {
237                let is_ordered = is_ordered_list_style(&list.style);
238                if !prev_was_list {
239                    list_counter = 0;
240                }
241                if is_ordered {
242                    list_counter += 1;
243                    parts.push(format!(
244                        "{}{}. {}",
245                        indent_prefix, list_counter, inline_text
246                    ));
247                } else {
248                    parts.push(format!("{}- {}", indent_prefix, inline_text));
249                }
250                prev_was_list = true;
251            } else {
252                // Prepend blockquote-style indent for indented paragraphs
253                if indent_prefix.is_empty() {
254                    parts.push(inline_text);
255                } else {
256                    parts.push(format!("{}{}", indent_prefix, inline_text));
257                }
258                prev_was_list = false;
259                list_counter = 0;
260            }
261
262            if !is_list {
263                prev_was_list = false;
264            }
265        }
266
267        // Join: list items with \n, others with \n\n
268        let mut result = String::new();
269        let blocks = &fragment_data.blocks;
270        for (idx, part) in parts.iter().enumerate() {
271            if idx > 0 {
272                let prev_is_list = blocks[idx - 1].list.is_some();
273                let curr_is_list = blocks[idx].list.is_some();
274                if prev_is_list && curr_is_list {
275                    result.push('\n');
276                } else {
277                    result.push_str("\n\n");
278                }
279            }
280            result.push_str(part);
281        }
282
283        result
284    }
285
286    /// Returns true if the fragment contains no text or elements.
287    pub fn is_empty(&self) -> bool {
288        self.plain_text.is_empty()
289    }
290
291    /// Returns the serialized internal representation.
292    pub(crate) fn raw_data(&self) -> &str {
293        &self.data
294    }
295}
296
297impl Default for DocumentFragment {
298    fn default() -> Self {
299        Self::new()
300    }
301}
302
303// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
304// Shared helpers (used by both to_html and to_markdown)
305// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
306
307fn is_ordered_list_style(style: &ListStyle) -> bool {
308    matches!(
309        style,
310        ListStyle::Decimal
311            | ListStyle::LowerAlpha
312            | ListStyle::UpperAlpha
313            | ListStyle::LowerRoman
314            | ListStyle::UpperRoman
315    )
316}
317
318// ── HTML helpers ────────────────────────────────────────────────
319
320fn escape_html(s: &str) -> String {
321    let mut out = String::with_capacity(s.len());
322    for c in s.chars() {
323        match c {
324            '&' => out.push_str("&amp;"),
325            '<' => out.push_str("&lt;"),
326            '>' => out.push_str("&gt;"),
327            '"' => out.push_str("&quot;"),
328            '\'' => out.push_str("&#x27;"),
329            _ => out.push(c),
330        }
331    }
332    out
333}
334
335/// Build a CSS `style` attribute value from block-level formatting (ISSUE-19).
336fn block_style_attr(block: &FragmentBlock) -> String {
337    use crate::Alignment;
338
339    let mut parts = Vec::new();
340    if let Some(ref alignment) = block.alignment {
341        let value = match alignment {
342            Alignment::Left => "left",
343            Alignment::Right => "right",
344            Alignment::Center => "center",
345            Alignment::Justify => "justify",
346        };
347        parts.push(format!("text-align: {}", value));
348    }
349    if let Some(n) = block.indent
350        && n > 0
351    {
352        parts.push(format!("margin-left: {}em", n));
353    }
354    if let Some(px) = block.text_indent
355        && px != 0
356    {
357        parts.push(format!("text-indent: {}px", px));
358    }
359    if let Some(px) = block.top_margin {
360        parts.push(format!("margin-top: {}px", px));
361    }
362    if let Some(px) = block.bottom_margin {
363        parts.push(format!("margin-bottom: {}px", px));
364    }
365    if let Some(px) = block.left_margin {
366        parts.push(format!("margin-left: {}px", px));
367    }
368    if let Some(px) = block.right_margin {
369        parts.push(format!("margin-right: {}px", px));
370    }
371    parts.join("; ")
372}
373
374fn push_inline_html(out: &mut String, elements: &[FragmentElement]) {
375    for elem in elements {
376        let text = match &elem.content {
377            InlineContent::Text(t) => escape_html(t),
378            InlineContent::Image {
379                name,
380                width,
381                height,
382                ..
383            } => {
384                format!(
385                    "<img src=\"{}\" width=\"{}\" height=\"{}\">",
386                    escape_html(name),
387                    width,
388                    height
389                )
390            }
391            InlineContent::Empty => String::new(),
392        };
393
394        let is_monospace = elem
395            .fmt_font_family
396            .as_deref()
397            .is_some_and(|f| f == "monospace");
398        let is_bold = elem.fmt_font_bold.unwrap_or(false);
399        let is_italic = elem.fmt_font_italic.unwrap_or(false);
400        let is_underline = elem.fmt_font_underline.unwrap_or(false);
401        let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
402        let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
403
404        let mut result = text;
405
406        if is_monospace {
407            result = format!("<code>{}</code>", result);
408        }
409        if is_bold {
410            result = format!("<strong>{}</strong>", result);
411        }
412        if is_italic {
413            result = format!("<em>{}</em>", result);
414        }
415        if is_underline {
416            result = format!("<u>{}</u>", result);
417        }
418        if is_strikeout {
419            result = format!("<s>{}</s>", result);
420        }
421        if is_anchor && let Some(ref href) = elem.fmt_anchor_href {
422            result = format!("<a href=\"{}\">{}</a>", escape_html(href), result);
423        }
424
425        out.push_str(&result);
426    }
427}
428
429// ── Markdown helpers ────────────────────────────────────────────
430
431fn escape_markdown(s: &str) -> String {
432    let mut out = String::with_capacity(s.len());
433    for c in s.chars() {
434        if matches!(
435            c,
436            '\\' | '`'
437                | '*'
438                | '_'
439                | '{'
440                | '}'
441                | '['
442                | ']'
443                | '('
444                | ')'
445                | '#'
446                | '+'
447                | '-'
448                | '.'
449                | '!'
450                | '|'
451                | '~'
452                | '<'
453                | '>'
454        ) {
455            out.push('\\');
456        }
457        out.push(c);
458    }
459    out
460}
461
462fn render_inline_markdown(elements: &[FragmentElement]) -> String {
463    let mut out = String::new();
464    for elem in elements {
465        let raw_text = match &elem.content {
466            InlineContent::Text(t) => t.clone(),
467            InlineContent::Image { name, .. } => format!("![{}]({})", name, name),
468            InlineContent::Empty => String::new(),
469        };
470
471        let is_monospace = elem
472            .fmt_font_family
473            .as_deref()
474            .is_some_and(|f| f == "monospace");
475        let is_bold = elem.fmt_font_bold.unwrap_or(false);
476        let is_italic = elem.fmt_font_italic.unwrap_or(false);
477        let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
478        let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
479
480        if is_monospace {
481            out.push('`');
482            out.push_str(&raw_text);
483            out.push('`');
484        } else {
485            let mut text = escape_markdown(&raw_text);
486            if is_bold && is_italic {
487                text = format!("***{}***", text);
488            } else if is_bold {
489                text = format!("**{}**", text);
490            } else if is_italic {
491                text = format!("*{}*", text);
492            }
493            if is_strikeout {
494                text = format!("~~{}~~", text);
495            }
496            if is_anchor {
497                let href = elem.fmt_anchor_href.as_deref().unwrap_or("");
498                out.push_str(&format!("[{}]({})", text, href));
499            } else {
500                out.push_str(&text);
501            }
502        }
503    }
504    out
505}
506
507// ── Fragment construction from parsed content ───────────────────
508
509/// Convert parsed blocks (from HTML or Markdown parser) into a `DocumentFragment`.
510fn parsed_blocks_to_fragment(
511    parsed: Vec<frontend::common::parser_tools::content_parser::ParsedBlock>,
512) -> DocumentFragment {
513    use frontend::common::parser_tools::fragment_schema::FragmentList;
514
515    let blocks: Vec<FragmentBlock> = parsed
516        .into_iter()
517        .map(|pb| {
518            let elements: Vec<FragmentElement> = pb
519                .spans
520                .iter()
521                .map(|span| {
522                    let content = InlineContent::Text(span.text.clone());
523                    let fmt_font_family = if span.code {
524                        Some("monospace".into())
525                    } else {
526                        None
527                    };
528                    let fmt_font_bold = if span.bold { Some(true) } else { None };
529                    let fmt_font_italic = if span.italic { Some(true) } else { None };
530                    let fmt_font_underline = if span.underline { Some(true) } else { None };
531                    let fmt_font_strikeout = if span.strikeout { Some(true) } else { None };
532                    let (fmt_anchor_href, fmt_is_anchor) = if let Some(ref href) = span.link_href {
533                        (Some(href.clone()), Some(true))
534                    } else {
535                        (None, None)
536                    };
537
538                    FragmentElement {
539                        content,
540                        fmt_font_family,
541                        fmt_font_point_size: None,
542                        fmt_font_weight: None,
543                        fmt_font_bold,
544                        fmt_font_italic,
545                        fmt_font_underline,
546                        fmt_font_overline: None,
547                        fmt_font_strikeout,
548                        fmt_letter_spacing: None,
549                        fmt_word_spacing: None,
550                        fmt_anchor_href,
551                        fmt_anchor_names: vec![],
552                        fmt_is_anchor,
553                        fmt_tooltip: None,
554                        fmt_underline_style: None,
555                        fmt_vertical_alignment: None,
556                    }
557                })
558                .collect();
559
560            let plain_text: String = pb.spans.iter().map(|s| s.text.as_str()).collect();
561
562            let list = pb.list_style.map(|style| FragmentList {
563                style,
564                indent: 0,
565                prefix: String::new(),
566                suffix: String::new(),
567            });
568
569            FragmentBlock {
570                plain_text,
571                elements,
572                heading_level: pb.heading_level,
573                list,
574                alignment: None,
575                indent: None,
576                text_indent: None,
577                marker: None,
578                top_margin: None,
579                bottom_margin: None,
580                left_margin: None,
581                right_margin: None,
582                tab_positions: vec![],
583                line_height: pb.line_height,
584                non_breakable_lines: pb.non_breakable_lines,
585                direction: pb.direction,
586                background_color: pb.background_color,
587                is_code_block: None,
588                code_language: None,
589            }
590        })
591        .collect();
592
593    let data = serde_json::to_string(&FragmentData { blocks })
594        .expect("fragment serialization should not fail");
595
596    let plain_text = parsed_plain_text_from_data(&data);
597
598    DocumentFragment { data, plain_text }
599}
600
601/// Extract plain text from serialized fragment data.
602fn parsed_plain_text_from_data(data: &str) -> String {
603    let fragment_data: FragmentData = match serde_json::from_str(data) {
604        Ok(d) => d,
605        Err(_) => return String::new(),
606    };
607
608    fragment_data
609        .blocks
610        .iter()
611        .map(|b| b.plain_text.as_str())
612        .collect::<Vec<_>>()
613        .join("\n")
614}