1use crate::{InlineContent, ListStyle};
4use frontend::common::parser_tools::content_parser::{ParsedElement, ParsedSpan};
5use frontend::common::parser_tools::fragment_schema::{
6 FragmentBlock, FragmentData, FragmentElement, FragmentTable, FragmentTableCell,
7};
8
9#[derive(Debug, Clone)]
15pub struct DocumentFragment {
16 data: String,
17 plain_text: String,
18}
19
20impl DocumentFragment {
21 pub fn new() -> Self {
23 Self {
24 data: String::new(),
25 plain_text: String::new(),
26 }
27 }
28
29 pub fn from_plain_text(text: &str) -> Self {
34 let blocks: Vec<FragmentBlock> = text
35 .split('\n')
36 .map(|line| FragmentBlock {
37 plain_text: line.to_string(),
38 elements: vec![FragmentElement {
39 content: InlineContent::Text(line.to_string()),
40 fmt_font_family: None,
41 fmt_font_point_size: None,
42 fmt_font_weight: None,
43 fmt_font_bold: None,
44 fmt_font_italic: None,
45 fmt_font_underline: None,
46 fmt_font_overline: None,
47 fmt_font_strikeout: None,
48 fmt_letter_spacing: None,
49 fmt_word_spacing: None,
50 fmt_anchor_href: None,
51 fmt_anchor_names: vec![],
52 fmt_is_anchor: None,
53 fmt_tooltip: None,
54 fmt_underline_style: None,
55 fmt_vertical_alignment: None,
56 }],
57 heading_level: None,
58 list: None,
59 alignment: None,
60 indent: None,
61 text_indent: None,
62 marker: None,
63 top_margin: None,
64 bottom_margin: None,
65 left_margin: None,
66 right_margin: None,
67 tab_positions: vec![],
68 line_height: None,
69 non_breakable_lines: None,
70 direction: None,
71 background_color: None,
72 is_code_block: None,
73 code_language: None,
74 hyphenate: None,
75 language: None,
76 })
77 .collect();
78
79 let data = serde_json::to_string(&FragmentData {
80 blocks,
81 tables: vec![],
82 })
83 .expect("fragment serialization should not fail");
84
85 Self {
86 data,
87 plain_text: text.to_string(),
88 }
89 }
90
91 pub fn from_html(html: &str) -> Self {
93 let parsed = frontend::common::parser_tools::content_parser::parse_html_elements(html);
94 parsed_elements_to_fragment(parsed)
95 }
96
97 pub fn from_markdown(markdown: &str) -> Self {
99 let parsed = frontend::common::parser_tools::content_parser::parse_markdown(markdown);
100 parsed_elements_to_fragment(parsed)
101 }
102
103 pub fn from_document(doc: &crate::TextDocument) -> crate::Result<Self> {
105 let inner = doc.inner.lock();
106 let dto = frontend::document_inspection::ExtractFragmentDto {
110 position: 0,
111 anchor: i64::MAX,
112 };
113 let result =
114 frontend::commands::document_inspection_commands::extract_fragment(&inner.ctx, &dto)?;
115 Ok(Self::from_raw(result.fragment_data, result.plain_text))
116 }
117
118 pub(crate) fn from_raw(data: String, plain_text: String) -> Self {
120 Self { data, plain_text }
121 }
122
123 pub fn to_plain_text(&self) -> &str {
125 &self.plain_text
126 }
127
128 pub fn to_html(&self) -> String {
130 if self.data.is_empty() {
131 return String::from("<html><head><meta charset=\"utf-8\"></head><body></body></html>");
132 }
133
134 let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
135 Ok(d) => d,
136 Err(_) => {
137 return String::from(
138 "<html><head><meta charset=\"utf-8\"></head><body></body></html>",
139 );
140 }
141 };
142
143 let mut body = String::new();
144 let blocks = &fragment_data.blocks;
145
146 if blocks.len() == 1 && blocks[0].is_inline_only() && fragment_data.tables.is_empty() {
148 push_inline_html(&mut body, &blocks[0].elements);
149 return format!(
150 "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
151 body
152 );
153 }
154
155 let mut sorted_tables: Vec<&FragmentTable> = fragment_data.tables.iter().collect();
157 sorted_tables.sort_by_key(|t| t.block_insert_index);
158 let mut table_cursor = 0;
159
160 let mut i = 0;
161
162 while i < blocks.len() {
163 while table_cursor < sorted_tables.len()
165 && sorted_tables[table_cursor].block_insert_index <= i
166 {
167 push_table_html(&mut body, sorted_tables[table_cursor]);
168 table_cursor += 1;
169 }
170
171 let block = &blocks[i];
172
173 if let Some(ref list) = block.list {
174 let is_ordered = is_ordered_list_style(&list.style);
175 let list_tag = if is_ordered { "ol" } else { "ul" };
176 body.push('<');
177 body.push_str(list_tag);
178 body.push('>');
179
180 while i < blocks.len() {
181 let b = &blocks[i];
182 match &b.list {
183 Some(l) if is_ordered_list_style(&l.style) == is_ordered => {
184 body.push_str("<li>");
185 push_inline_html(&mut body, &b.elements);
186 body.push_str("</li>");
187 i += 1;
188 }
189 _ => break,
190 }
191 }
192
193 body.push_str("</");
194 body.push_str(list_tag);
195 body.push('>');
196 } else if let Some(level) = block.heading_level {
197 let n = level.clamp(1, 6);
198 body.push_str(&format!("<h{}>", n));
199 push_inline_html(&mut body, &block.elements);
200 body.push_str(&format!("</h{}>", n));
201 i += 1;
202 } else {
203 let style = block_style_attr(block);
205 if style.is_empty() {
206 body.push_str("<p>");
207 } else {
208 body.push_str(&format!("<p style=\"{}\">", style));
209 }
210 push_inline_html(&mut body, &block.elements);
211 body.push_str("</p>");
212 i += 1;
213 }
214 }
215
216 while table_cursor < sorted_tables.len() {
218 push_table_html(&mut body, sorted_tables[table_cursor]);
219 table_cursor += 1;
220 }
221
222 format!(
223 "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
224 body
225 )
226 }
227
228 pub fn to_markdown(&self) -> String {
230 if self.data.is_empty() {
231 return String::new();
232 }
233
234 let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
235 Ok(d) => d,
236 Err(_) => return String::new(),
237 };
238
239 let mut parts: Vec<(String, bool)> = Vec::new();
241 let mut prev_was_list = false;
242 let mut list_counter: u32 = 0;
243
244 let mut sorted_tables: Vec<&FragmentTable> = fragment_data.tables.iter().collect();
246 sorted_tables.sort_by_key(|t| t.block_insert_index);
247 let mut table_cursor = 0;
248
249 for (blk_idx, block) in fragment_data.blocks.iter().enumerate() {
250 while table_cursor < sorted_tables.len()
252 && sorted_tables[table_cursor].block_insert_index <= blk_idx
253 {
254 parts.push((render_table_markdown(sorted_tables[table_cursor]), false));
255 prev_was_list = false;
256 list_counter = 0;
257 table_cursor += 1;
258 }
259
260 let inline_text = render_inline_markdown(&block.elements);
261 let is_list = block.list.is_some();
262
263 let indent_prefix = match block.indent {
264 Some(n) if n > 0 => " ".repeat(n as usize),
265 _ => String::new(),
266 };
267
268 if let Some(level) = block.heading_level {
269 let n = level.clamp(1, 6) as usize;
270 let prefix = "#".repeat(n);
271 parts.push((format!("{} {}", prefix, inline_text), false));
272 prev_was_list = false;
273 list_counter = 0;
274 } else if let Some(ref list) = block.list {
275 let is_ordered = is_ordered_list_style(&list.style);
276 if !prev_was_list {
277 list_counter = 0;
278 }
279 if is_ordered {
280 list_counter += 1;
281 parts.push((
282 format!("{}{}. {}", indent_prefix, list_counter, inline_text),
283 true,
284 ));
285 } else {
286 parts.push((format!("{}- {}", indent_prefix, inline_text), true));
287 }
288 prev_was_list = true;
289 } else {
290 if indent_prefix.is_empty() {
291 parts.push((inline_text, false));
292 } else {
293 parts.push((format!("{}{}", indent_prefix, inline_text), false));
294 }
295 prev_was_list = false;
296 list_counter = 0;
297 }
298
299 if !is_list {
300 prev_was_list = false;
301 }
302 }
303
304 while table_cursor < sorted_tables.len() {
306 parts.push((render_table_markdown(sorted_tables[table_cursor]), false));
307 table_cursor += 1;
308 }
309
310 let mut result = String::new();
312 for (idx, (text, is_list)) in parts.iter().enumerate() {
313 if idx > 0 {
314 let (_, prev_is_list) = &parts[idx - 1];
315 if *prev_is_list && *is_list {
316 result.push('\n');
317 } else {
318 result.push_str("\n\n");
319 }
320 }
321 result.push_str(text);
322 }
323
324 result
325 }
326
327 pub fn is_empty(&self) -> bool {
329 self.plain_text.is_empty()
330 }
331
332 pub(crate) fn raw_data(&self) -> &str {
334 &self.data
335 }
336}
337
338impl Default for DocumentFragment {
339 fn default() -> Self {
340 Self::new()
341 }
342}
343
344fn is_ordered_list_style(style: &ListStyle) -> bool {
349 matches!(
350 style,
351 ListStyle::Decimal
352 | ListStyle::LowerAlpha
353 | ListStyle::UpperAlpha
354 | ListStyle::LowerRoman
355 | ListStyle::UpperRoman
356 )
357}
358
359fn escape_html(s: &str) -> String {
362 let mut out = String::with_capacity(s.len());
363 for c in s.chars() {
364 match c {
365 '&' => out.push_str("&"),
366 '<' => out.push_str("<"),
367 '>' => out.push_str(">"),
368 '"' => out.push_str("""),
369 '\'' => out.push_str("'"),
370 '\r' => out.push_str(" "),
375 _ => out.push(c),
376 }
377 }
378 out
379}
380
381fn block_style_attr(block: &FragmentBlock) -> String {
383 use crate::Alignment;
384
385 let mut parts = Vec::new();
386 if let Some(ref alignment) = block.alignment {
387 let value = match alignment {
388 Alignment::Left => "left",
389 Alignment::Right => "right",
390 Alignment::Center => "center",
391 Alignment::Justify => "justify",
392 };
393 parts.push(format!("text-align: {}", value));
394 }
395 if let Some(n) = block.indent
396 && n > 0
397 {
398 parts.push(format!("margin-left: {}em", n));
399 }
400 if let Some(px) = block.text_indent
401 && px != 0
402 {
403 parts.push(format!("text-indent: {}px", px));
404 }
405 if let Some(px) = block.top_margin {
406 parts.push(format!("margin-top: {}px", px));
407 }
408 if let Some(px) = block.bottom_margin {
409 parts.push(format!("margin-bottom: {}px", px));
410 }
411 if let Some(px) = block.left_margin {
412 parts.push(format!("margin-left: {}px", px));
413 }
414 if let Some(px) = block.right_margin {
415 parts.push(format!("margin-right: {}px", px));
416 }
417 parts.join("; ")
418}
419
420fn push_inline_html(out: &mut String, elements: &[FragmentElement]) {
421 for elem in elements {
422 let text = match &elem.content {
423 InlineContent::Text(t) => escape_html(t),
424 InlineContent::Image {
425 name,
426 width,
427 height,
428 ..
429 } => {
430 format!(
431 "<img src=\"{}\" width=\"{}\" height=\"{}\">",
432 escape_html(name),
433 width,
434 height
435 )
436 }
437 InlineContent::Empty => String::new(),
438 };
439
440 let is_monospace = elem
441 .fmt_font_family
442 .as_deref()
443 .is_some_and(|f| f == "monospace");
444 let is_bold = elem.fmt_font_bold.unwrap_or(false);
445 let is_italic = elem.fmt_font_italic.unwrap_or(false);
446 let is_underline = elem.fmt_font_underline.unwrap_or(false);
447 let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
448 let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
449
450 let mut result = text;
451
452 if is_monospace {
453 result = format!("<code>{}</code>", result);
454 }
455 if is_bold {
456 result = format!("<strong>{}</strong>", result);
457 }
458 if is_italic {
459 result = format!("<em>{}</em>", result);
460 }
461 if is_underline {
462 result = format!("<u>{}</u>", result);
463 }
464 if is_strikeout {
465 result = format!("<s>{}</s>", result);
466 }
467 if is_anchor && let Some(ref href) = elem.fmt_anchor_href {
468 result = format!("<a href=\"{}\">{}</a>", escape_html(href), result);
469 }
470
471 out.push_str(&result);
472 }
473}
474
475fn push_table_html(out: &mut String, table: &FragmentTable) {
477 out.push_str("<table>");
478 for row in 0..table.rows {
479 out.push_str("<tr>");
480 for col in 0..table.columns {
481 if let Some(cell) = table.cells.iter().find(|c| c.row == row && c.column == col) {
482 out.push_str("<td");
483 if cell.row_span > 1 {
484 out.push_str(&format!(" rowspan=\"{}\"", cell.row_span));
485 }
486 if cell.column_span > 1 {
487 out.push_str(&format!(" colspan=\"{}\"", cell.column_span));
488 }
489 out.push('>');
490 for (i, block) in cell.blocks.iter().enumerate() {
491 if i > 0 {
492 out.push_str("<br>");
493 }
494 push_inline_html(out, &block.elements);
495 }
496 out.push_str("</td>");
497 }
498 }
500 out.push_str("</tr>");
501 }
502 out.push_str("</table>");
503}
504
505fn escape_markdown(s: &str) -> String {
508 let mut out = String::with_capacity(s.len());
509 for c in s.chars() {
510 if matches!(
511 c,
512 '\\' | '`'
513 | '*'
514 | '_'
515 | '{'
516 | '}'
517 | '['
518 | ']'
519 | '('
520 | ')'
521 | '#'
522 | '+'
523 | '-'
524 | '.'
525 | '!'
526 | '|'
527 | '~'
528 | '<'
529 | '>'
530 ) {
531 out.push('\\');
532 }
533 out.push(c);
534 }
535 out
536}
537
538fn render_inline_markdown(elements: &[FragmentElement]) -> String {
539 let mut out = String::new();
540 for elem in elements {
541 let raw_text = match &elem.content {
542 InlineContent::Text(t) => t.clone(),
543 InlineContent::Image { name, .. } => format!("", name, name),
544 InlineContent::Empty => String::new(),
545 };
546
547 let is_monospace = elem
548 .fmt_font_family
549 .as_deref()
550 .is_some_and(|f| f == "monospace");
551 let is_bold = elem.fmt_font_bold.unwrap_or(false);
552 let is_italic = elem.fmt_font_italic.unwrap_or(false);
553 let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
554 let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
555
556 if is_monospace {
557 out.push('`');
558 out.push_str(&raw_text);
559 out.push('`');
560 } else {
561 let mut text = escape_markdown(&raw_text);
562 if is_bold && is_italic {
563 text = format!("***{}***", text);
564 } else if is_bold {
565 text = format!("**{}**", text);
566 } else if is_italic {
567 text = format!("*{}*", text);
568 }
569 if is_strikeout {
570 text = format!("~~{}~~", text);
571 }
572 if is_anchor {
573 let href = elem.fmt_anchor_href.as_deref().unwrap_or("");
574 out.push_str(&format!("[{}]({})", text, href));
575 } else {
576 out.push_str(&text);
577 }
578 }
579 }
580 out
581}
582
583fn render_table_markdown(table: &FragmentTable) -> String {
585 let mut rows: Vec<Vec<String>> = vec![vec![String::new(); table.columns]; table.rows];
586
587 for cell in &table.cells {
588 let text: String = cell
589 .blocks
590 .iter()
591 .map(|b| render_inline_markdown(&b.elements))
592 .collect::<Vec<_>>()
593 .join(" ");
594 if cell.row < table.rows && cell.column < table.columns {
595 rows[cell.row][cell.column] = text;
596 }
597 }
598
599 let mut out = String::new();
600 for (i, row) in rows.iter().enumerate() {
601 out.push_str("| ");
602 out.push_str(&row.join(" | "));
603 out.push_str(" |");
604 if i == 0 {
605 out.push('\n');
607 out.push('|');
608 for _ in 0..table.columns {
609 out.push_str(" --- |");
610 }
611 }
612 if i + 1 < rows.len() {
613 out.push('\n');
614 }
615 }
616 out
617}
618
619fn span_to_fragment_element(span: &ParsedSpan) -> FragmentElement {
624 let content = InlineContent::Text(span.text.clone());
625 let fmt_font_family = if span.code {
626 Some("monospace".into())
627 } else {
628 None
629 };
630 let fmt_font_bold = if span.bold { Some(true) } else { None };
631 let fmt_font_italic = if span.italic { Some(true) } else { None };
632 let fmt_font_underline = if span.underline { Some(true) } else { None };
633 let fmt_font_strikeout = if span.strikeout { Some(true) } else { None };
634 let (fmt_anchor_href, fmt_is_anchor) = if let Some(ref href) = span.link_href {
635 (Some(href.clone()), Some(true))
636 } else {
637 (None, None)
638 };
639
640 FragmentElement {
641 content,
642 fmt_font_family,
643 fmt_font_point_size: None,
644 fmt_font_weight: None,
645 fmt_font_bold,
646 fmt_font_italic,
647 fmt_font_underline,
648 fmt_font_overline: None,
649 fmt_font_strikeout,
650 fmt_letter_spacing: None,
651 fmt_word_spacing: None,
652 fmt_anchor_href,
653 fmt_anchor_names: vec![],
654 fmt_is_anchor,
655 fmt_tooltip: None,
656 fmt_underline_style: None,
657 fmt_vertical_alignment: None,
658 }
659}
660
661fn parsed_elements_to_fragment(parsed: Vec<ParsedElement>) -> DocumentFragment {
664 use frontend::common::parser_tools::fragment_schema::FragmentList;
665
666 let mut blocks: Vec<FragmentBlock> = Vec::new();
667 let mut tables: Vec<FragmentTable> = Vec::new();
668
669 for elem in parsed {
670 match elem {
671 ParsedElement::Block(pb) => {
672 let elements: Vec<FragmentElement> =
673 pb.spans.iter().map(span_to_fragment_element).collect();
674 let plain_text: String = pb.spans.iter().map(|s| s.text.as_str()).collect();
675 let list = pb.list_style.map(|style| FragmentList {
676 style,
677 indent: pb.list_indent as i64,
678 prefix: String::new(),
679 suffix: String::new(),
680 });
681
682 blocks.push(FragmentBlock {
683 plain_text,
684 elements,
685 heading_level: pb.heading_level,
686 list,
687 alignment: None,
688 indent: None,
689 text_indent: None,
690 marker: None,
691 top_margin: None,
692 bottom_margin: None,
693 left_margin: None,
694 right_margin: None,
695 tab_positions: vec![],
696 line_height: pb.line_height,
697 non_breakable_lines: pb.non_breakable_lines,
698 direction: pb.direction,
699 background_color: pb.background_color,
700 is_code_block: None,
701 code_language: None,
702 hyphenate: None,
703 language: None,
704 });
705 }
706 ParsedElement::Table(pt) => {
707 let block_insert_index = blocks.len();
708 let num_columns = pt.rows.iter().map(|r| r.len()).max().unwrap_or(0);
709 let num_rows = pt.rows.len();
710
711 let mut frag_cells: Vec<FragmentTableCell> = Vec::new();
712 for (row_idx, row) in pt.rows.iter().enumerate() {
713 for (col_idx, cell) in row.iter().enumerate() {
714 let cell_elements: Vec<FragmentElement> =
715 cell.spans.iter().map(span_to_fragment_element).collect();
716 let cell_text: String =
717 cell.spans.iter().map(|s| s.text.as_str()).collect();
718
719 frag_cells.push(FragmentTableCell {
720 row: row_idx,
721 column: col_idx,
722 row_span: 1,
723 column_span: 1,
724 blocks: vec![FragmentBlock {
725 plain_text: cell_text,
726 elements: cell_elements,
727 heading_level: None,
728 list: None,
729 alignment: None,
730 indent: None,
731 text_indent: None,
732 marker: None,
733 top_margin: None,
734 bottom_margin: None,
735 left_margin: None,
736 right_margin: None,
737 tab_positions: vec![],
738 line_height: None,
739 non_breakable_lines: None,
740 direction: None,
741 background_color: None,
742 is_code_block: None,
743 code_language: None,
744 hyphenate: None,
745 language: None,
746 }],
747 fmt_padding: None,
748 fmt_border: None,
749 fmt_vertical_alignment: None,
750 fmt_background_color: None,
751 });
752 }
753 }
754
755 tables.push(FragmentTable {
756 rows: num_rows,
757 columns: num_columns,
758 cells: frag_cells,
759 block_insert_index,
760 fmt_border: None,
761 fmt_cell_spacing: None,
762 fmt_cell_padding: None,
763 fmt_width: None,
764 fmt_alignment: None,
765 column_widths: vec![],
766 });
767 }
768 }
769 }
770
771 let data = serde_json::to_string(&FragmentData { blocks, tables })
772 .expect("fragment serialization should not fail");
773
774 let plain_text = parsed_plain_text_from_data(&data);
775
776 DocumentFragment { data, plain_text }
777}
778
779fn parsed_plain_text_from_data(data: &str) -> String {
781 let fragment_data: FragmentData = match serde_json::from_str(data) {
782 Ok(d) => d,
783 Err(_) => return String::new(),
784 };
785
786 fragment_data
787 .blocks
788 .iter()
789 .map(|b| b.plain_text.as_str())
790 .collect::<Vec<_>>()
791 .join("\n")
792}