Skip to main content

text_document/
fragment.rs

1//! DocumentFragment — format-agnostic rich text interchange type.
2
3use crate::{InlineContent, ListStyle};
4use frontend::common::parser_tools::fragment_schema::{
5    FragmentBlock, FragmentData, FragmentElement,
6};
7
8/// A piece of rich text that can be inserted into a [`TextDocument`](crate::TextDocument).
9///
10/// `DocumentFragment` is the clipboard/interchange type. It carries
11/// blocks, inline elements, and formatting in a format-agnostic
12/// internal representation.
13#[derive(Debug, Clone)]
14pub struct DocumentFragment {
15    data: String,
16    plain_text: String,
17}
18
19impl DocumentFragment {
20    /// Create an empty fragment.
21    pub fn new() -> Self {
22        Self {
23            data: String::new(),
24            plain_text: String::new(),
25        }
26    }
27
28    /// Create a fragment from plain text.
29    ///
30    /// Builds valid fragment data so the fragment can be inserted via
31    /// [`TextCursor::insert_fragment`](crate::TextCursor::insert_fragment).
32    pub fn from_plain_text(text: &str) -> Self {
33        let blocks: Vec<FragmentBlock> = text
34            .split('\n')
35            .map(|line| FragmentBlock {
36                plain_text: line.to_string(),
37                elements: vec![FragmentElement {
38                    content: InlineContent::Text(line.to_string()),
39                    fmt_font_family: None,
40                    fmt_font_point_size: None,
41                    fmt_font_weight: None,
42                    fmt_font_bold: None,
43                    fmt_font_italic: None,
44                    fmt_font_underline: None,
45                    fmt_font_overline: None,
46                    fmt_font_strikeout: None,
47                    fmt_letter_spacing: None,
48                    fmt_word_spacing: None,
49                    fmt_anchor_href: None,
50                    fmt_anchor_names: vec![],
51                    fmt_is_anchor: None,
52                    fmt_tooltip: None,
53                    fmt_underline_style: None,
54                    fmt_vertical_alignment: None,
55                }],
56                heading_level: None,
57                list: None,
58                alignment: None,
59                indent: None,
60                text_indent: None,
61                marker: None,
62                top_margin: None,
63                bottom_margin: None,
64                left_margin: None,
65                right_margin: None,
66                tab_positions: vec![],
67                line_height: None,
68                non_breakable_lines: None,
69                direction: None,
70                background_color: None,
71            })
72            .collect();
73
74        let data = serde_json::to_string(&FragmentData { blocks })
75            .expect("fragment serialization should not fail");
76
77        Self {
78            data,
79            plain_text: text.to_string(),
80        }
81    }
82
83    /// Create a fragment from HTML.
84    pub fn from_html(html: &str) -> Self {
85        let parsed = frontend::common::parser_tools::content_parser::parse_html(html);
86        parsed_blocks_to_fragment(parsed)
87    }
88
89    /// Create a fragment from Markdown.
90    pub fn from_markdown(markdown: &str) -> Self {
91        let parsed = frontend::common::parser_tools::content_parser::parse_markdown(markdown);
92        parsed_blocks_to_fragment(parsed)
93    }
94
95    /// Create a fragment from an entire document.
96    pub fn from_document(doc: &crate::TextDocument) -> crate::Result<Self> {
97        let inner = doc.inner.lock();
98        let char_count = {
99            let stats =
100                frontend::commands::document_inspection_commands::get_document_stats(&inner.ctx)?;
101            crate::convert::to_usize(stats.character_count)
102        };
103        let dto = frontend::document_inspection::ExtractFragmentDto {
104            position: 0,
105            anchor: crate::convert::to_i64(char_count),
106        };
107        let result =
108            frontend::commands::document_inspection_commands::extract_fragment(&inner.ctx, &dto)?;
109        Ok(Self::from_raw(result.fragment_data, result.plain_text))
110    }
111
112    /// Create a fragment from the serialized internal format.
113    pub(crate) fn from_raw(data: String, plain_text: String) -> Self {
114        Self { data, plain_text }
115    }
116
117    /// Export the fragment as plain text.
118    pub fn to_plain_text(&self) -> &str {
119        &self.plain_text
120    }
121
122    /// Export the fragment as HTML.
123    pub fn to_html(&self) -> String {
124        if self.data.is_empty() {
125            return String::from("<html><head><meta charset=\"utf-8\"></head><body></body></html>");
126        }
127
128        let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
129            Ok(d) => d,
130            Err(_) => {
131                return String::from(
132                    "<html><head><meta charset=\"utf-8\"></head><body></body></html>",
133                );
134            }
135        };
136
137        let mut body = String::new();
138        let blocks = &fragment_data.blocks;
139        let mut i = 0;
140
141        while i < blocks.len() {
142            let block = &blocks[i];
143
144            if let Some(ref list) = block.list {
145                let is_ordered = is_ordered_list_style(&list.style);
146                let list_tag = if is_ordered { "ol" } else { "ul" };
147                body.push('<');
148                body.push_str(list_tag);
149                body.push('>');
150
151                while i < blocks.len() {
152                    let b = &blocks[i];
153                    match &b.list {
154                        Some(l) if is_ordered_list_style(&l.style) == is_ordered => {
155                            body.push_str("<li>");
156                            push_inline_html(&mut body, &b.elements);
157                            body.push_str("</li>");
158                            i += 1;
159                        }
160                        _ => break,
161                    }
162                }
163
164                body.push_str("</");
165                body.push_str(list_tag);
166                body.push('>');
167            } else if let Some(level) = block.heading_level {
168                let n = level.clamp(1, 6);
169                body.push_str(&format!("<h{}>", n));
170                push_inline_html(&mut body, &block.elements);
171                body.push_str(&format!("</h{}>", n));
172                i += 1;
173            } else {
174                // Emit block-level formatting as inline styles (ISSUE-19)
175                let style = block_style_attr(block);
176                if style.is_empty() {
177                    body.push_str("<p>");
178                } else {
179                    body.push_str(&format!("<p style=\"{}\">", style));
180                }
181                push_inline_html(&mut body, &block.elements);
182                body.push_str("</p>");
183                i += 1;
184            }
185        }
186
187        format!(
188            "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
189            body
190        )
191    }
192
193    /// Export the fragment as Markdown.
194    pub fn to_markdown(&self) -> String {
195        if self.data.is_empty() {
196            return String::new();
197        }
198
199        let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
200            Ok(d) => d,
201            Err(_) => return String::new(),
202        };
203
204        let mut parts: Vec<String> = Vec::new();
205        let mut prev_was_list = false;
206        let mut list_counter: u32 = 0;
207
208        for block in &fragment_data.blocks {
209            let inline_text = render_inline_markdown(&block.elements);
210            let is_list = block.list.is_some();
211
212            // Markdown indent prefix from block indent level (ISSUE-19)
213            let indent_prefix = match block.indent {
214                Some(n) if n > 0 => "  ".repeat(n as usize),
215                _ => String::new(),
216            };
217
218            if let Some(level) = block.heading_level {
219                let n = level.clamp(1, 6) as usize;
220                let prefix = "#".repeat(n);
221                parts.push(format!("{} {}", prefix, inline_text));
222                prev_was_list = false;
223                list_counter = 0;
224            } else if let Some(ref list) = block.list {
225                let is_ordered = is_ordered_list_style(&list.style);
226                if !prev_was_list {
227                    list_counter = 0;
228                }
229                if is_ordered {
230                    list_counter += 1;
231                    parts.push(format!(
232                        "{}{}. {}",
233                        indent_prefix, list_counter, inline_text
234                    ));
235                } else {
236                    parts.push(format!("{}- {}", indent_prefix, inline_text));
237                }
238                prev_was_list = true;
239            } else {
240                // Prepend blockquote-style indent for indented paragraphs
241                if indent_prefix.is_empty() {
242                    parts.push(inline_text);
243                } else {
244                    parts.push(format!("{}{}", indent_prefix, inline_text));
245                }
246                prev_was_list = false;
247                list_counter = 0;
248            }
249
250            if !is_list {
251                prev_was_list = false;
252            }
253        }
254
255        // Join: list items with \n, others with \n\n
256        let mut result = String::new();
257        let blocks = &fragment_data.blocks;
258        for (idx, part) in parts.iter().enumerate() {
259            if idx > 0 {
260                let prev_is_list = blocks[idx - 1].list.is_some();
261                let curr_is_list = blocks[idx].list.is_some();
262                if prev_is_list && curr_is_list {
263                    result.push('\n');
264                } else {
265                    result.push_str("\n\n");
266                }
267            }
268            result.push_str(part);
269        }
270
271        result
272    }
273
274    /// Returns true if the fragment contains no text or elements.
275    pub fn is_empty(&self) -> bool {
276        self.plain_text.is_empty()
277    }
278
279    /// Returns the serialized internal representation.
280    pub(crate) fn raw_data(&self) -> &str {
281        &self.data
282    }
283}
284
285impl Default for DocumentFragment {
286    fn default() -> Self {
287        Self::new()
288    }
289}
290
291// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
292// Shared helpers (used by both to_html and to_markdown)
293// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
294
295fn is_ordered_list_style(style: &ListStyle) -> bool {
296    matches!(
297        style,
298        ListStyle::Decimal
299            | ListStyle::LowerAlpha
300            | ListStyle::UpperAlpha
301            | ListStyle::LowerRoman
302            | ListStyle::UpperRoman
303    )
304}
305
306// ── HTML helpers ────────────────────────────────────────────────
307
308fn escape_html(s: &str) -> String {
309    let mut out = String::with_capacity(s.len());
310    for c in s.chars() {
311        match c {
312            '&' => out.push_str("&amp;"),
313            '<' => out.push_str("&lt;"),
314            '>' => out.push_str("&gt;"),
315            '"' => out.push_str("&quot;"),
316            '\'' => out.push_str("&#x27;"),
317            _ => out.push(c),
318        }
319    }
320    out
321}
322
323/// Build a CSS `style` attribute value from block-level formatting (ISSUE-19).
324fn block_style_attr(block: &FragmentBlock) -> String {
325    use crate::Alignment;
326
327    let mut parts = Vec::new();
328    if let Some(ref alignment) = block.alignment {
329        let value = match alignment {
330            Alignment::Left => "left",
331            Alignment::Right => "right",
332            Alignment::Center => "center",
333            Alignment::Justify => "justify",
334        };
335        parts.push(format!("text-align: {}", value));
336    }
337    if let Some(n) = block.indent
338        && n > 0
339    {
340        parts.push(format!("margin-left: {}em", n));
341    }
342    if let Some(px) = block.text_indent
343        && px != 0
344    {
345        parts.push(format!("text-indent: {}px", px));
346    }
347    if let Some(px) = block.top_margin {
348        parts.push(format!("margin-top: {}px", px));
349    }
350    if let Some(px) = block.bottom_margin {
351        parts.push(format!("margin-bottom: {}px", px));
352    }
353    if let Some(px) = block.left_margin {
354        parts.push(format!("margin-left: {}px", px));
355    }
356    if let Some(px) = block.right_margin {
357        parts.push(format!("margin-right: {}px", px));
358    }
359    parts.join("; ")
360}
361
362fn push_inline_html(out: &mut String, elements: &[FragmentElement]) {
363    for elem in elements {
364        let text = match &elem.content {
365            InlineContent::Text(t) => escape_html(t),
366            InlineContent::Image {
367                name,
368                width,
369                height,
370                ..
371            } => {
372                format!(
373                    "<img src=\"{}\" width=\"{}\" height=\"{}\">",
374                    escape_html(name),
375                    width,
376                    height
377                )
378            }
379            InlineContent::Empty => String::new(),
380        };
381
382        let is_monospace = elem
383            .fmt_font_family
384            .as_deref()
385            .is_some_and(|f| f == "monospace");
386        let is_bold = elem.fmt_font_bold.unwrap_or(false);
387        let is_italic = elem.fmt_font_italic.unwrap_or(false);
388        let is_underline = elem.fmt_font_underline.unwrap_or(false);
389        let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
390        let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
391
392        let mut result = text;
393
394        if is_monospace {
395            result = format!("<code>{}</code>", result);
396        }
397        if is_bold {
398            result = format!("<strong>{}</strong>", result);
399        }
400        if is_italic {
401            result = format!("<em>{}</em>", result);
402        }
403        if is_underline {
404            result = format!("<u>{}</u>", result);
405        }
406        if is_strikeout {
407            result = format!("<s>{}</s>", result);
408        }
409        if is_anchor && let Some(ref href) = elem.fmt_anchor_href {
410            result = format!("<a href=\"{}\">{}</a>", escape_html(href), result);
411        }
412
413        out.push_str(&result);
414    }
415}
416
417// ── Markdown helpers ────────────────────────────────────────────
418
419fn escape_markdown(s: &str) -> String {
420    let mut out = String::with_capacity(s.len());
421    for c in s.chars() {
422        if matches!(
423            c,
424            '\\' | '`'
425                | '*'
426                | '_'
427                | '{'
428                | '}'
429                | '['
430                | ']'
431                | '('
432                | ')'
433                | '#'
434                | '+'
435                | '-'
436                | '.'
437                | '!'
438                | '|'
439                | '~'
440                | '<'
441                | '>'
442        ) {
443            out.push('\\');
444        }
445        out.push(c);
446    }
447    out
448}
449
450fn render_inline_markdown(elements: &[FragmentElement]) -> String {
451    let mut out = String::new();
452    for elem in elements {
453        let raw_text = match &elem.content {
454            InlineContent::Text(t) => t.clone(),
455            InlineContent::Image { name, .. } => format!("![{}]({})", name, name),
456            InlineContent::Empty => String::new(),
457        };
458
459        let is_monospace = elem
460            .fmt_font_family
461            .as_deref()
462            .is_some_and(|f| f == "monospace");
463        let is_bold = elem.fmt_font_bold.unwrap_or(false);
464        let is_italic = elem.fmt_font_italic.unwrap_or(false);
465        let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
466        let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
467
468        if is_monospace {
469            out.push('`');
470            out.push_str(&raw_text);
471            out.push('`');
472        } else {
473            let mut text = escape_markdown(&raw_text);
474            if is_bold && is_italic {
475                text = format!("***{}***", text);
476            } else if is_bold {
477                text = format!("**{}**", text);
478            } else if is_italic {
479                text = format!("*{}*", text);
480            }
481            if is_strikeout {
482                text = format!("~~{}~~", text);
483            }
484            if is_anchor {
485                let href = elem.fmt_anchor_href.as_deref().unwrap_or("");
486                out.push_str(&format!("[{}]({})", text, href));
487            } else {
488                out.push_str(&text);
489            }
490        }
491    }
492    out
493}
494
495// ── Fragment construction from parsed content ───────────────────
496
497/// Convert parsed blocks (from HTML or Markdown parser) into a `DocumentFragment`.
498fn parsed_blocks_to_fragment(
499    parsed: Vec<frontend::common::parser_tools::content_parser::ParsedBlock>,
500) -> DocumentFragment {
501    use frontend::common::parser_tools::fragment_schema::FragmentList;
502
503    let blocks: Vec<FragmentBlock> = parsed
504        .into_iter()
505        .map(|pb| {
506            let elements: Vec<FragmentElement> = pb
507                .spans
508                .iter()
509                .map(|span| {
510                    let content = InlineContent::Text(span.text.clone());
511                    let fmt_font_family = if span.code {
512                        Some("monospace".into())
513                    } else {
514                        None
515                    };
516                    let fmt_font_bold = if span.bold { Some(true) } else { None };
517                    let fmt_font_italic = if span.italic { Some(true) } else { None };
518                    let fmt_font_underline = if span.underline { Some(true) } else { None };
519                    let fmt_font_strikeout = if span.strikeout { Some(true) } else { None };
520                    let (fmt_anchor_href, fmt_is_anchor) = if let Some(ref href) = span.link_href {
521                        (Some(href.clone()), Some(true))
522                    } else {
523                        (None, None)
524                    };
525
526                    FragmentElement {
527                        content,
528                        fmt_font_family,
529                        fmt_font_point_size: None,
530                        fmt_font_weight: None,
531                        fmt_font_bold,
532                        fmt_font_italic,
533                        fmt_font_underline,
534                        fmt_font_overline: None,
535                        fmt_font_strikeout,
536                        fmt_letter_spacing: None,
537                        fmt_word_spacing: None,
538                        fmt_anchor_href,
539                        fmt_anchor_names: vec![],
540                        fmt_is_anchor,
541                        fmt_tooltip: None,
542                        fmt_underline_style: None,
543                        fmt_vertical_alignment: None,
544                    }
545                })
546                .collect();
547
548            let plain_text: String = pb.spans.iter().map(|s| s.text.as_str()).collect();
549
550            let list = pb.list_style.map(|style| FragmentList {
551                style,
552                indent: 0,
553                prefix: String::new(),
554                suffix: String::new(),
555            });
556
557            FragmentBlock {
558                plain_text,
559                elements,
560                heading_level: pb.heading_level,
561                list,
562                alignment: None,
563                indent: None,
564                text_indent: None,
565                marker: None,
566                top_margin: None,
567                bottom_margin: None,
568                left_margin: None,
569                right_margin: None,
570                tab_positions: vec![],
571                line_height: pb.line_height,
572                non_breakable_lines: pb.non_breakable_lines,
573                direction: pb.direction,
574                background_color: pb.background_color,
575            }
576        })
577        .collect();
578
579    let data = serde_json::to_string(&FragmentData { blocks })
580        .expect("fragment serialization should not fail");
581
582    let plain_text = parsed_plain_text_from_data(&data);
583
584    DocumentFragment { data, plain_text }
585}
586
587/// Extract plain text from serialized fragment data.
588fn parsed_plain_text_from_data(data: &str) -> String {
589    let fragment_data: FragmentData = match serde_json::from_str(data) {
590        Ok(d) => d,
591        Err(_) => return String::new(),
592    };
593
594    fragment_data
595        .blocks
596        .iter()
597        .map(|b| b.plain_text.as_str())
598        .collect::<Vec<_>>()
599        .join("\n")
600}