Skip to main content

text_document/
fragment.rs

1//! DocumentFragment — format-agnostic rich text interchange type.
2
3use crate::{InlineContent, ListStyle};
4use frontend::common::parser_tools::fragment_schema::{
5    FragmentBlock, FragmentData, FragmentElement,
6};
7
8/// A piece of rich text that can be inserted into a [`TextDocument`](crate::TextDocument).
9///
10/// `DocumentFragment` is the clipboard/interchange type. It carries
11/// blocks, inline elements, and formatting in a format-agnostic
12/// internal representation.
13#[derive(Debug, Clone)]
14pub struct DocumentFragment {
15    data: String,
16    plain_text: String,
17}
18
19impl DocumentFragment {
20    /// Create an empty fragment.
21    pub fn new() -> Self {
22        Self {
23            data: String::new(),
24            plain_text: String::new(),
25        }
26    }
27
28    /// Create a fragment from plain text.
29    ///
30    /// Builds valid fragment data so the fragment can be inserted via
31    /// [`TextCursor::insert_fragment`](crate::TextCursor::insert_fragment).
32    pub fn from_plain_text(text: &str) -> Self {
33        let blocks: Vec<FragmentBlock> = text
34            .split('\n')
35            .map(|line| FragmentBlock {
36                plain_text: line.to_string(),
37                elements: vec![FragmentElement {
38                    content: InlineContent::Text(line.to_string()),
39                    fmt_font_family: None,
40                    fmt_font_point_size: None,
41                    fmt_font_weight: None,
42                    fmt_font_bold: None,
43                    fmt_font_italic: None,
44                    fmt_font_underline: None,
45                    fmt_font_overline: None,
46                    fmt_font_strikeout: None,
47                    fmt_letter_spacing: None,
48                    fmt_word_spacing: None,
49                    fmt_anchor_href: None,
50                    fmt_anchor_names: vec![],
51                    fmt_is_anchor: None,
52                    fmt_tooltip: None,
53                    fmt_underline_style: None,
54                    fmt_vertical_alignment: None,
55                }],
56                heading_level: None,
57                list: None,
58                alignment: None,
59                indent: None,
60                text_indent: None,
61                marker: None,
62                top_margin: None,
63                bottom_margin: None,
64                left_margin: None,
65                right_margin: None,
66                tab_positions: vec![],
67            })
68            .collect();
69
70        let data = serde_json::to_string(&FragmentData { blocks })
71            .expect("fragment serialization should not fail");
72
73        Self {
74            data,
75            plain_text: text.to_string(),
76        }
77    }
78
79    /// Create a fragment from HTML.
80    pub fn from_html(html: &str) -> Self {
81        let parsed = frontend::common::parser_tools::content_parser::parse_html(html);
82        parsed_blocks_to_fragment(parsed)
83    }
84
85    /// Create a fragment from Markdown.
86    pub fn from_markdown(markdown: &str) -> Self {
87        let parsed = frontend::common::parser_tools::content_parser::parse_markdown(markdown);
88        parsed_blocks_to_fragment(parsed)
89    }
90
91    /// Create a fragment from an entire document.
92    pub fn from_document(doc: &crate::TextDocument) -> crate::Result<Self> {
93        let inner = doc.inner.lock();
94        let char_count = {
95            let stats =
96                frontend::commands::document_inspection_commands::get_document_stats(&inner.ctx)?;
97            crate::convert::to_usize(stats.character_count)
98        };
99        let dto = frontend::document_inspection::ExtractFragmentDto {
100            position: 0,
101            anchor: crate::convert::to_i64(char_count),
102        };
103        let result =
104            frontend::commands::document_inspection_commands::extract_fragment(&inner.ctx, &dto)?;
105        Ok(Self::from_raw(result.fragment_data, result.plain_text))
106    }
107
108    /// Create a fragment from the serialized internal format.
109    pub(crate) fn from_raw(data: String, plain_text: String) -> Self {
110        Self { data, plain_text }
111    }
112
113    /// Export the fragment as plain text.
114    pub fn to_plain_text(&self) -> &str {
115        &self.plain_text
116    }
117
118    /// Export the fragment as HTML.
119    pub fn to_html(&self) -> String {
120        if self.data.is_empty() {
121            return String::from("<html><head><meta charset=\"utf-8\"></head><body></body></html>");
122        }
123
124        let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
125            Ok(d) => d,
126            Err(_) => {
127                return String::from(
128                    "<html><head><meta charset=\"utf-8\"></head><body></body></html>",
129                )
130            }
131        };
132
133        let mut body = String::new();
134        let blocks = &fragment_data.blocks;
135        let mut i = 0;
136
137        while i < blocks.len() {
138            let block = &blocks[i];
139
140            if let Some(ref list) = block.list {
141                let is_ordered = is_ordered_list_style(&list.style);
142                let list_tag = if is_ordered { "ol" } else { "ul" };
143                body.push('<');
144                body.push_str(list_tag);
145                body.push('>');
146
147                while i < blocks.len() {
148                    let b = &blocks[i];
149                    match &b.list {
150                        Some(l) if is_ordered_list_style(&l.style) == is_ordered => {
151                            body.push_str("<li>");
152                            push_inline_html(&mut body, &b.elements);
153                            body.push_str("</li>");
154                            i += 1;
155                        }
156                        _ => break,
157                    }
158                }
159
160                body.push_str("</");
161                body.push_str(list_tag);
162                body.push('>');
163            } else if let Some(level) = block.heading_level {
164                let n = level.clamp(1, 6);
165                body.push_str(&format!("<h{}>", n));
166                push_inline_html(&mut body, &block.elements);
167                body.push_str(&format!("</h{}>", n));
168                i += 1;
169            } else {
170                // Emit block-level formatting as inline styles (ISSUE-19)
171                let style = block_style_attr(block);
172                if style.is_empty() {
173                    body.push_str("<p>");
174                } else {
175                    body.push_str(&format!("<p style=\"{}\">", style));
176                }
177                push_inline_html(&mut body, &block.elements);
178                body.push_str("</p>");
179                i += 1;
180            }
181        }
182
183        format!(
184            "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
185            body
186        )
187    }
188
189    /// Export the fragment as Markdown.
190    pub fn to_markdown(&self) -> String {
191        if self.data.is_empty() {
192            return String::new();
193        }
194
195        let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
196            Ok(d) => d,
197            Err(_) => return String::new(),
198        };
199
200        let mut parts: Vec<String> = Vec::new();
201        let mut prev_was_list = false;
202        let mut list_counter: u32 = 0;
203
204        for block in &fragment_data.blocks {
205            let inline_text = render_inline_markdown(&block.elements);
206            let is_list = block.list.is_some();
207
208            // Markdown indent prefix from block indent level (ISSUE-19)
209            let indent_prefix = match block.indent {
210                Some(n) if n > 0 => "  ".repeat(n as usize),
211                _ => String::new(),
212            };
213
214            if let Some(level) = block.heading_level {
215                let n = level.clamp(1, 6) as usize;
216                let prefix = "#".repeat(n);
217                parts.push(format!("{} {}", prefix, inline_text));
218                prev_was_list = false;
219                list_counter = 0;
220            } else if let Some(ref list) = block.list {
221                let is_ordered = is_ordered_list_style(&list.style);
222                if !prev_was_list {
223                    list_counter = 0;
224                }
225                if is_ordered {
226                    list_counter += 1;
227                    parts.push(format!("{}{}. {}", indent_prefix, list_counter, inline_text));
228                } else {
229                    parts.push(format!("{}- {}", indent_prefix, inline_text));
230                }
231                prev_was_list = true;
232            } else {
233                // Prepend blockquote-style indent for indented paragraphs
234                if indent_prefix.is_empty() {
235                    parts.push(inline_text);
236                } else {
237                    parts.push(format!("{}{}", indent_prefix, inline_text));
238                }
239                prev_was_list = false;
240                list_counter = 0;
241            }
242
243            if !is_list {
244                prev_was_list = false;
245            }
246        }
247
248        // Join: list items with \n, others with \n\n
249        let mut result = String::new();
250        let blocks = &fragment_data.blocks;
251        for (idx, part) in parts.iter().enumerate() {
252            if idx > 0 {
253                let prev_is_list = blocks[idx - 1].list.is_some();
254                let curr_is_list = blocks[idx].list.is_some();
255                if prev_is_list && curr_is_list {
256                    result.push('\n');
257                } else {
258                    result.push_str("\n\n");
259                }
260            }
261            result.push_str(part);
262        }
263
264        result
265    }
266
267    /// Returns true if the fragment contains no text or elements.
268    pub fn is_empty(&self) -> bool {
269        self.plain_text.is_empty()
270    }
271
272    /// Returns the serialized internal representation.
273    pub(crate) fn raw_data(&self) -> &str {
274        &self.data
275    }
276}
277
278impl Default for DocumentFragment {
279    fn default() -> Self {
280        Self::new()
281    }
282}
283
284// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
285// Shared helpers (used by both to_html and to_markdown)
286// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
287
288fn is_ordered_list_style(style: &ListStyle) -> bool {
289    matches!(
290        style,
291        ListStyle::Decimal
292            | ListStyle::LowerAlpha
293            | ListStyle::UpperAlpha
294            | ListStyle::LowerRoman
295            | ListStyle::UpperRoman
296    )
297}
298
299// ── HTML helpers ────────────────────────────────────────────────
300
301fn escape_html(s: &str) -> String {
302    let mut out = String::with_capacity(s.len());
303    for c in s.chars() {
304        match c {
305            '&' => out.push_str("&amp;"),
306            '<' => out.push_str("&lt;"),
307            '>' => out.push_str("&gt;"),
308            '"' => out.push_str("&quot;"),
309            '\'' => out.push_str("&#x27;"),
310            _ => out.push(c),
311        }
312    }
313    out
314}
315
316/// Build a CSS `style` attribute value from block-level formatting (ISSUE-19).
317fn block_style_attr(block: &FragmentBlock) -> String {
318    use crate::Alignment;
319
320    let mut parts = Vec::new();
321    if let Some(ref alignment) = block.alignment {
322        let value = match alignment {
323            Alignment::Left => "left",
324            Alignment::Right => "right",
325            Alignment::Center => "center",
326            Alignment::Justify => "justify",
327        };
328        parts.push(format!("text-align: {}", value));
329    }
330    if let Some(n) = block.indent
331        && n > 0 {
332            parts.push(format!("margin-left: {}em", n));
333        }
334    if let Some(px) = block.text_indent
335        && px != 0 {
336            parts.push(format!("text-indent: {}px", px));
337        }
338    if let Some(px) = block.top_margin {
339        parts.push(format!("margin-top: {}px", px));
340    }
341    if let Some(px) = block.bottom_margin {
342        parts.push(format!("margin-bottom: {}px", px));
343    }
344    if let Some(px) = block.left_margin {
345        parts.push(format!("margin-left: {}px", px));
346    }
347    if let Some(px) = block.right_margin {
348        parts.push(format!("margin-right: {}px", px));
349    }
350    parts.join("; ")
351}
352
353fn push_inline_html(out: &mut String, elements: &[FragmentElement]) {
354    for elem in elements {
355        let text = match &elem.content {
356            InlineContent::Text(t) => escape_html(t),
357            InlineContent::Image {
358                name,
359                width,
360                height,
361                ..
362            } => {
363                format!(
364                    "<img src=\"{}\" width=\"{}\" height=\"{}\">",
365                    escape_html(name),
366                    width,
367                    height
368                )
369            }
370            InlineContent::Empty => String::new(),
371        };
372
373        let is_monospace = elem
374            .fmt_font_family
375            .as_deref()
376            .is_some_and(|f| f == "monospace");
377        let is_bold = elem.fmt_font_bold.unwrap_or(false);
378        let is_italic = elem.fmt_font_italic.unwrap_or(false);
379        let is_underline = elem.fmt_font_underline.unwrap_or(false);
380        let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
381        let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
382
383        let mut result = text;
384
385        if is_monospace {
386            result = format!("<code>{}</code>", result);
387        }
388        if is_bold {
389            result = format!("<strong>{}</strong>", result);
390        }
391        if is_italic {
392            result = format!("<em>{}</em>", result);
393        }
394        if is_underline {
395            result = format!("<u>{}</u>", result);
396        }
397        if is_strikeout {
398            result = format!("<s>{}</s>", result);
399        }
400        if is_anchor
401            && let Some(ref href) = elem.fmt_anchor_href {
402                result = format!("<a href=\"{}\">{}</a>", escape_html(href), result);
403            }
404
405        out.push_str(&result);
406    }
407}
408
409// ── Markdown helpers ────────────────────────────────────────────
410
411fn escape_markdown(s: &str) -> String {
412    let mut out = String::with_capacity(s.len());
413    for c in s.chars() {
414        if matches!(
415            c,
416            '\\' | '`'
417                | '*'
418                | '_'
419                | '{'
420                | '}'
421                | '['
422                | ']'
423                | '('
424                | ')'
425                | '#'
426                | '+'
427                | '-'
428                | '.'
429                | '!'
430                | '|'
431                | '~'
432                | '<'
433                | '>'
434        ) {
435            out.push('\\');
436        }
437        out.push(c);
438    }
439    out
440}
441
442fn render_inline_markdown(elements: &[FragmentElement]) -> String {
443    let mut out = String::new();
444    for elem in elements {
445        let raw_text = match &elem.content {
446            InlineContent::Text(t) => t.clone(),
447            InlineContent::Image { name, .. } => format!("![{}]({})", name, name),
448            InlineContent::Empty => String::new(),
449        };
450
451        let is_monospace = elem
452            .fmt_font_family
453            .as_deref()
454            .is_some_and(|f| f == "monospace");
455        let is_bold = elem.fmt_font_bold.unwrap_or(false);
456        let is_italic = elem.fmt_font_italic.unwrap_or(false);
457        let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
458        let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
459
460        if is_monospace {
461            out.push('`');
462            out.push_str(&raw_text);
463            out.push('`');
464        } else {
465            let mut text = escape_markdown(&raw_text);
466            if is_bold && is_italic {
467                text = format!("***{}***", text);
468            } else if is_bold {
469                text = format!("**{}**", text);
470            } else if is_italic {
471                text = format!("*{}*", text);
472            }
473            if is_strikeout {
474                text = format!("~~{}~~", text);
475            }
476            if is_anchor {
477                let href = elem.fmt_anchor_href.as_deref().unwrap_or("");
478                out.push_str(&format!("[{}]({})", text, href));
479            } else {
480                out.push_str(&text);
481            }
482        }
483    }
484    out
485}
486
487// ── Fragment construction from parsed content ───────────────────
488
489/// Convert parsed blocks (from HTML or Markdown parser) into a `DocumentFragment`.
490fn parsed_blocks_to_fragment(
491    parsed: Vec<frontend::common::parser_tools::content_parser::ParsedBlock>,
492) -> DocumentFragment {
493    use frontend::common::parser_tools::fragment_schema::FragmentList;
494
495    let blocks: Vec<FragmentBlock> = parsed
496        .into_iter()
497        .map(|pb| {
498            let elements: Vec<FragmentElement> = pb
499                .spans
500                .iter()
501                .map(|span| {
502                    let content = InlineContent::Text(span.text.clone());
503                    let fmt_font_family = if span.code {
504                        Some("monospace".into())
505                    } else {
506                        None
507                    };
508                    let fmt_font_bold = if span.bold { Some(true) } else { None };
509                    let fmt_font_italic = if span.italic { Some(true) } else { None };
510                    let fmt_font_underline = if span.underline { Some(true) } else { None };
511                    let fmt_font_strikeout = if span.strikeout { Some(true) } else { None };
512                    let (fmt_anchor_href, fmt_is_anchor) = if let Some(ref href) = span.link_href {
513                        (Some(href.clone()), Some(true))
514                    } else {
515                        (None, None)
516                    };
517
518                    FragmentElement {
519                        content,
520                        fmt_font_family,
521                        fmt_font_point_size: None,
522                        fmt_font_weight: None,
523                        fmt_font_bold,
524                        fmt_font_italic,
525                        fmt_font_underline,
526                        fmt_font_overline: None,
527                        fmt_font_strikeout,
528                        fmt_letter_spacing: None,
529                        fmt_word_spacing: None,
530                        fmt_anchor_href,
531                        fmt_anchor_names: vec![],
532                        fmt_is_anchor,
533                        fmt_tooltip: None,
534                        fmt_underline_style: None,
535                        fmt_vertical_alignment: None,
536                    }
537                })
538                .collect();
539
540            let plain_text: String = pb.spans.iter().map(|s| s.text.as_str()).collect();
541
542            let list = pb.list_style.map(|style| FragmentList {
543                style,
544                indent: 0,
545                prefix: String::new(),
546                suffix: String::new(),
547            });
548
549            FragmentBlock {
550                plain_text,
551                elements,
552                heading_level: pb.heading_level,
553                list,
554                alignment: None,
555                indent: None,
556                text_indent: None,
557                marker: None,
558                top_margin: None,
559                bottom_margin: None,
560                left_margin: None,
561                right_margin: None,
562                tab_positions: vec![],
563            }
564        })
565        .collect();
566
567    let data = serde_json::to_string(&FragmentData { blocks })
568        .expect("fragment serialization should not fail");
569
570    let plain_text = parsed_plain_text_from_data(&data);
571
572    DocumentFragment { data, plain_text }
573}
574
575/// Extract plain text from serialized fragment data.
576fn parsed_plain_text_from_data(data: &str) -> String {
577    let fragment_data: FragmentData = match serde_json::from_str(data) {
578        Ok(d) => d,
579        Err(_) => return String::new(),
580    };
581
582    fragment_data
583        .blocks
584        .iter()
585        .map(|b| b.plain_text.as_str())
586        .collect::<Vec<_>>()
587        .join("\n")
588}