Skip to main content

text_document/
fragment.rs

1//! DocumentFragment — format-agnostic rich text interchange type.
2
3use crate::{InlineContent, ListStyle};
4use frontend::common::parser_tools::fragment_schema::{
5    FragmentBlock, FragmentData, FragmentElement,
6};
7
8/// A piece of rich text that can be inserted into a [`TextDocument`](crate::TextDocument).
9///
10/// `DocumentFragment` is the clipboard/interchange type. It carries
11/// blocks, inline elements, and formatting in a format-agnostic
12/// internal representation.
13#[derive(Debug, Clone)]
14pub struct DocumentFragment {
15    data: String,
16    plain_text: String,
17}
18
19impl DocumentFragment {
20    /// Create an empty fragment.
21    pub fn new() -> Self {
22        Self {
23            data: String::new(),
24            plain_text: String::new(),
25        }
26    }
27
28    /// Create a fragment from plain text.
29    ///
30    /// Builds valid fragment data so the fragment can be inserted via
31    /// [`TextCursor::insert_fragment`](crate::TextCursor::insert_fragment).
32    pub fn from_plain_text(text: &str) -> Self {
33        let blocks: Vec<FragmentBlock> = text
34            .split('\n')
35            .map(|line| FragmentBlock {
36                plain_text: line.to_string(),
37                elements: vec![FragmentElement {
38                    content: InlineContent::Text(line.to_string()),
39                    fmt_font_family: None,
40                    fmt_font_point_size: None,
41                    fmt_font_weight: None,
42                    fmt_font_bold: None,
43                    fmt_font_italic: None,
44                    fmt_font_underline: None,
45                    fmt_font_overline: None,
46                    fmt_font_strikeout: None,
47                    fmt_letter_spacing: None,
48                    fmt_word_spacing: None,
49                    fmt_anchor_href: None,
50                    fmt_anchor_names: vec![],
51                    fmt_is_anchor: None,
52                    fmt_tooltip: None,
53                    fmt_underline_style: None,
54                    fmt_vertical_alignment: None,
55                }],
56                heading_level: None,
57                list: None,
58                alignment: None,
59                indent: None,
60                text_indent: None,
61                marker: None,
62                top_margin: None,
63                bottom_margin: None,
64                left_margin: None,
65                right_margin: None,
66                tab_positions: vec![],
67                line_height: None,
68                non_breakable_lines: None,
69                direction: None,
70                background_color: None,
71            })
72            .collect();
73
74        let data = serde_json::to_string(&FragmentData { blocks })
75            .expect("fragment serialization should not fail");
76
77        Self {
78            data,
79            plain_text: text.to_string(),
80        }
81    }
82
83    /// Create a fragment from HTML.
84    pub fn from_html(html: &str) -> Self {
85        let parsed = frontend::common::parser_tools::content_parser::parse_html(html);
86        parsed_blocks_to_fragment(parsed)
87    }
88
89    /// Create a fragment from Markdown.
90    pub fn from_markdown(markdown: &str) -> Self {
91        let parsed = frontend::common::parser_tools::content_parser::parse_markdown(markdown);
92        parsed_blocks_to_fragment(parsed)
93    }
94
95    /// Create a fragment from an entire document.
96    pub fn from_document(doc: &crate::TextDocument) -> crate::Result<Self> {
97        let inner = doc.inner.lock();
98        let char_count = {
99            let stats =
100                frontend::commands::document_inspection_commands::get_document_stats(&inner.ctx)?;
101            crate::convert::to_usize(stats.character_count)
102        };
103        let dto = frontend::document_inspection::ExtractFragmentDto {
104            position: 0,
105            anchor: crate::convert::to_i64(char_count),
106        };
107        let result =
108            frontend::commands::document_inspection_commands::extract_fragment(&inner.ctx, &dto)?;
109        Ok(Self::from_raw(result.fragment_data, result.plain_text))
110    }
111
112    /// Create a fragment from the serialized internal format.
113    pub(crate) fn from_raw(data: String, plain_text: String) -> Self {
114        Self { data, plain_text }
115    }
116
117    /// Export the fragment as plain text.
118    pub fn to_plain_text(&self) -> &str {
119        &self.plain_text
120    }
121
122    /// Export the fragment as HTML.
123    pub fn to_html(&self) -> String {
124        if self.data.is_empty() {
125            return String::from("<html><head><meta charset=\"utf-8\"></head><body></body></html>");
126        }
127
128        let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
129            Ok(d) => d,
130            Err(_) => {
131                return String::from(
132                    "<html><head><meta charset=\"utf-8\"></head><body></body></html>",
133                );
134            }
135        };
136
137        let mut body = String::new();
138        let blocks = &fragment_data.blocks;
139
140        // Single inline-only block: emit inline HTML without block wrapper
141        if blocks.len() == 1 && blocks[0].is_inline_only() {
142            push_inline_html(&mut body, &blocks[0].elements);
143            return format!(
144                "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
145                body
146            );
147        }
148
149        let mut i = 0;
150
151        while i < blocks.len() {
152            let block = &blocks[i];
153
154            if let Some(ref list) = block.list {
155                let is_ordered = is_ordered_list_style(&list.style);
156                let list_tag = if is_ordered { "ol" } else { "ul" };
157                body.push('<');
158                body.push_str(list_tag);
159                body.push('>');
160
161                while i < blocks.len() {
162                    let b = &blocks[i];
163                    match &b.list {
164                        Some(l) if is_ordered_list_style(&l.style) == is_ordered => {
165                            body.push_str("<li>");
166                            push_inline_html(&mut body, &b.elements);
167                            body.push_str("</li>");
168                            i += 1;
169                        }
170                        _ => break,
171                    }
172                }
173
174                body.push_str("</");
175                body.push_str(list_tag);
176                body.push('>');
177            } else if let Some(level) = block.heading_level {
178                let n = level.clamp(1, 6);
179                body.push_str(&format!("<h{}>", n));
180                push_inline_html(&mut body, &block.elements);
181                body.push_str(&format!("</h{}>", n));
182                i += 1;
183            } else {
184                // Emit block-level formatting as inline styles (ISSUE-19)
185                let style = block_style_attr(block);
186                if style.is_empty() {
187                    body.push_str("<p>");
188                } else {
189                    body.push_str(&format!("<p style=\"{}\">", style));
190                }
191                push_inline_html(&mut body, &block.elements);
192                body.push_str("</p>");
193                i += 1;
194            }
195        }
196
197        format!(
198            "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
199            body
200        )
201    }
202
203    /// Export the fragment as Markdown.
204    pub fn to_markdown(&self) -> String {
205        if self.data.is_empty() {
206            return String::new();
207        }
208
209        let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
210            Ok(d) => d,
211            Err(_) => return String::new(),
212        };
213
214        let mut parts: Vec<String> = Vec::new();
215        let mut prev_was_list = false;
216        let mut list_counter: u32 = 0;
217
218        for block in &fragment_data.blocks {
219            let inline_text = render_inline_markdown(&block.elements);
220            let is_list = block.list.is_some();
221
222            // Markdown indent prefix from block indent level (ISSUE-19)
223            let indent_prefix = match block.indent {
224                Some(n) if n > 0 => "  ".repeat(n as usize),
225                _ => String::new(),
226            };
227
228            if let Some(level) = block.heading_level {
229                let n = level.clamp(1, 6) as usize;
230                let prefix = "#".repeat(n);
231                parts.push(format!("{} {}", prefix, inline_text));
232                prev_was_list = false;
233                list_counter = 0;
234            } else if let Some(ref list) = block.list {
235                let is_ordered = is_ordered_list_style(&list.style);
236                if !prev_was_list {
237                    list_counter = 0;
238                }
239                if is_ordered {
240                    list_counter += 1;
241                    parts.push(format!(
242                        "{}{}. {}",
243                        indent_prefix, list_counter, inline_text
244                    ));
245                } else {
246                    parts.push(format!("{}- {}", indent_prefix, inline_text));
247                }
248                prev_was_list = true;
249            } else {
250                // Prepend blockquote-style indent for indented paragraphs
251                if indent_prefix.is_empty() {
252                    parts.push(inline_text);
253                } else {
254                    parts.push(format!("{}{}", indent_prefix, inline_text));
255                }
256                prev_was_list = false;
257                list_counter = 0;
258            }
259
260            if !is_list {
261                prev_was_list = false;
262            }
263        }
264
265        // Join: list items with \n, others with \n\n
266        let mut result = String::new();
267        let blocks = &fragment_data.blocks;
268        for (idx, part) in parts.iter().enumerate() {
269            if idx > 0 {
270                let prev_is_list = blocks[idx - 1].list.is_some();
271                let curr_is_list = blocks[idx].list.is_some();
272                if prev_is_list && curr_is_list {
273                    result.push('\n');
274                } else {
275                    result.push_str("\n\n");
276                }
277            }
278            result.push_str(part);
279        }
280
281        result
282    }
283
284    /// Returns true if the fragment contains no text or elements.
285    pub fn is_empty(&self) -> bool {
286        self.plain_text.is_empty()
287    }
288
289    /// Returns the serialized internal representation.
290    pub(crate) fn raw_data(&self) -> &str {
291        &self.data
292    }
293}
294
295impl Default for DocumentFragment {
296    fn default() -> Self {
297        Self::new()
298    }
299}
300
301// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
302// Shared helpers (used by both to_html and to_markdown)
303// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
304
305fn is_ordered_list_style(style: &ListStyle) -> bool {
306    matches!(
307        style,
308        ListStyle::Decimal
309            | ListStyle::LowerAlpha
310            | ListStyle::UpperAlpha
311            | ListStyle::LowerRoman
312            | ListStyle::UpperRoman
313    )
314}
315
316// ── HTML helpers ────────────────────────────────────────────────
317
318fn escape_html(s: &str) -> String {
319    let mut out = String::with_capacity(s.len());
320    for c in s.chars() {
321        match c {
322            '&' => out.push_str("&amp;"),
323            '<' => out.push_str("&lt;"),
324            '>' => out.push_str("&gt;"),
325            '"' => out.push_str("&quot;"),
326            '\'' => out.push_str("&#x27;"),
327            _ => out.push(c),
328        }
329    }
330    out
331}
332
333/// Build a CSS `style` attribute value from block-level formatting (ISSUE-19).
334fn block_style_attr(block: &FragmentBlock) -> String {
335    use crate::Alignment;
336
337    let mut parts = Vec::new();
338    if let Some(ref alignment) = block.alignment {
339        let value = match alignment {
340            Alignment::Left => "left",
341            Alignment::Right => "right",
342            Alignment::Center => "center",
343            Alignment::Justify => "justify",
344        };
345        parts.push(format!("text-align: {}", value));
346    }
347    if let Some(n) = block.indent
348        && n > 0
349    {
350        parts.push(format!("margin-left: {}em", n));
351    }
352    if let Some(px) = block.text_indent
353        && px != 0
354    {
355        parts.push(format!("text-indent: {}px", px));
356    }
357    if let Some(px) = block.top_margin {
358        parts.push(format!("margin-top: {}px", px));
359    }
360    if let Some(px) = block.bottom_margin {
361        parts.push(format!("margin-bottom: {}px", px));
362    }
363    if let Some(px) = block.left_margin {
364        parts.push(format!("margin-left: {}px", px));
365    }
366    if let Some(px) = block.right_margin {
367        parts.push(format!("margin-right: {}px", px));
368    }
369    parts.join("; ")
370}
371
372fn push_inline_html(out: &mut String, elements: &[FragmentElement]) {
373    for elem in elements {
374        let text = match &elem.content {
375            InlineContent::Text(t) => escape_html(t),
376            InlineContent::Image {
377                name,
378                width,
379                height,
380                ..
381            } => {
382                format!(
383                    "<img src=\"{}\" width=\"{}\" height=\"{}\">",
384                    escape_html(name),
385                    width,
386                    height
387                )
388            }
389            InlineContent::Empty => String::new(),
390        };
391
392        let is_monospace = elem
393            .fmt_font_family
394            .as_deref()
395            .is_some_and(|f| f == "monospace");
396        let is_bold = elem.fmt_font_bold.unwrap_or(false);
397        let is_italic = elem.fmt_font_italic.unwrap_or(false);
398        let is_underline = elem.fmt_font_underline.unwrap_or(false);
399        let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
400        let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
401
402        let mut result = text;
403
404        if is_monospace {
405            result = format!("<code>{}</code>", result);
406        }
407        if is_bold {
408            result = format!("<strong>{}</strong>", result);
409        }
410        if is_italic {
411            result = format!("<em>{}</em>", result);
412        }
413        if is_underline {
414            result = format!("<u>{}</u>", result);
415        }
416        if is_strikeout {
417            result = format!("<s>{}</s>", result);
418        }
419        if is_anchor && let Some(ref href) = elem.fmt_anchor_href {
420            result = format!("<a href=\"{}\">{}</a>", escape_html(href), result);
421        }
422
423        out.push_str(&result);
424    }
425}
426
427// ── Markdown helpers ────────────────────────────────────────────
428
429fn escape_markdown(s: &str) -> String {
430    let mut out = String::with_capacity(s.len());
431    for c in s.chars() {
432        if matches!(
433            c,
434            '\\' | '`'
435                | '*'
436                | '_'
437                | '{'
438                | '}'
439                | '['
440                | ']'
441                | '('
442                | ')'
443                | '#'
444                | '+'
445                | '-'
446                | '.'
447                | '!'
448                | '|'
449                | '~'
450                | '<'
451                | '>'
452        ) {
453            out.push('\\');
454        }
455        out.push(c);
456    }
457    out
458}
459
460fn render_inline_markdown(elements: &[FragmentElement]) -> String {
461    let mut out = String::new();
462    for elem in elements {
463        let raw_text = match &elem.content {
464            InlineContent::Text(t) => t.clone(),
465            InlineContent::Image { name, .. } => format!("![{}]({})", name, name),
466            InlineContent::Empty => String::new(),
467        };
468
469        let is_monospace = elem
470            .fmt_font_family
471            .as_deref()
472            .is_some_and(|f| f == "monospace");
473        let is_bold = elem.fmt_font_bold.unwrap_or(false);
474        let is_italic = elem.fmt_font_italic.unwrap_or(false);
475        let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
476        let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
477
478        if is_monospace {
479            out.push('`');
480            out.push_str(&raw_text);
481            out.push('`');
482        } else {
483            let mut text = escape_markdown(&raw_text);
484            if is_bold && is_italic {
485                text = format!("***{}***", text);
486            } else if is_bold {
487                text = format!("**{}**", text);
488            } else if is_italic {
489                text = format!("*{}*", text);
490            }
491            if is_strikeout {
492                text = format!("~~{}~~", text);
493            }
494            if is_anchor {
495                let href = elem.fmt_anchor_href.as_deref().unwrap_or("");
496                out.push_str(&format!("[{}]({})", text, href));
497            } else {
498                out.push_str(&text);
499            }
500        }
501    }
502    out
503}
504
505// ── Fragment construction from parsed content ───────────────────
506
507/// Convert parsed blocks (from HTML or Markdown parser) into a `DocumentFragment`.
508fn parsed_blocks_to_fragment(
509    parsed: Vec<frontend::common::parser_tools::content_parser::ParsedBlock>,
510) -> DocumentFragment {
511    use frontend::common::parser_tools::fragment_schema::FragmentList;
512
513    let blocks: Vec<FragmentBlock> = parsed
514        .into_iter()
515        .map(|pb| {
516            let elements: Vec<FragmentElement> = pb
517                .spans
518                .iter()
519                .map(|span| {
520                    let content = InlineContent::Text(span.text.clone());
521                    let fmt_font_family = if span.code {
522                        Some("monospace".into())
523                    } else {
524                        None
525                    };
526                    let fmt_font_bold = if span.bold { Some(true) } else { None };
527                    let fmt_font_italic = if span.italic { Some(true) } else { None };
528                    let fmt_font_underline = if span.underline { Some(true) } else { None };
529                    let fmt_font_strikeout = if span.strikeout { Some(true) } else { None };
530                    let (fmt_anchor_href, fmt_is_anchor) = if let Some(ref href) = span.link_href {
531                        (Some(href.clone()), Some(true))
532                    } else {
533                        (None, None)
534                    };
535
536                    FragmentElement {
537                        content,
538                        fmt_font_family,
539                        fmt_font_point_size: None,
540                        fmt_font_weight: None,
541                        fmt_font_bold,
542                        fmt_font_italic,
543                        fmt_font_underline,
544                        fmt_font_overline: None,
545                        fmt_font_strikeout,
546                        fmt_letter_spacing: None,
547                        fmt_word_spacing: None,
548                        fmt_anchor_href,
549                        fmt_anchor_names: vec![],
550                        fmt_is_anchor,
551                        fmt_tooltip: None,
552                        fmt_underline_style: None,
553                        fmt_vertical_alignment: None,
554                    }
555                })
556                .collect();
557
558            let plain_text: String = pb.spans.iter().map(|s| s.text.as_str()).collect();
559
560            let list = pb.list_style.map(|style| FragmentList {
561                style,
562                indent: 0,
563                prefix: String::new(),
564                suffix: String::new(),
565            });
566
567            FragmentBlock {
568                plain_text,
569                elements,
570                heading_level: pb.heading_level,
571                list,
572                alignment: None,
573                indent: None,
574                text_indent: None,
575                marker: None,
576                top_margin: None,
577                bottom_margin: None,
578                left_margin: None,
579                right_margin: None,
580                tab_positions: vec![],
581                line_height: pb.line_height,
582                non_breakable_lines: pb.non_breakable_lines,
583                direction: pb.direction,
584                background_color: pb.background_color,
585            }
586        })
587        .collect();
588
589    let data = serde_json::to_string(&FragmentData { blocks })
590        .expect("fragment serialization should not fail");
591
592    let plain_text = parsed_plain_text_from_data(&data);
593
594    DocumentFragment { data, plain_text }
595}
596
597/// Extract plain text from serialized fragment data.
598fn parsed_plain_text_from_data(data: &str) -> String {
599    let fragment_data: FragmentData = match serde_json::from_str(data) {
600        Ok(d) => d,
601        Err(_) => return String::new(),
602    };
603
604    fragment_data
605        .blocks
606        .iter()
607        .map(|b| b.plain_text.as_str())
608        .collect::<Vec<_>>()
609        .join("\n")
610}