1use crate::{InlineContent, ListStyle};
4use frontend::common::parser_tools::content_parser::{ParsedElement, ParsedSpan};
5use frontend::common::parser_tools::fragment_schema::{
6 FragmentBlock, FragmentData, FragmentElement, FragmentTable, FragmentTableCell,
7};
8
9#[derive(Debug, Clone)]
15pub struct DocumentFragment {
16 data: String,
17 plain_text: String,
18}
19
20impl DocumentFragment {
21 pub fn new() -> Self {
23 Self {
24 data: String::new(),
25 plain_text: String::new(),
26 }
27 }
28
29 pub fn from_plain_text(text: &str) -> Self {
34 let blocks: Vec<FragmentBlock> = text
35 .split('\n')
36 .map(|line| FragmentBlock {
37 plain_text: line.to_string(),
38 elements: vec![FragmentElement {
39 content: InlineContent::Text(line.to_string()),
40 fmt_font_family: None,
41 fmt_font_point_size: None,
42 fmt_font_weight: None,
43 fmt_font_bold: None,
44 fmt_font_italic: None,
45 fmt_font_underline: None,
46 fmt_font_overline: None,
47 fmt_font_strikeout: None,
48 fmt_letter_spacing: None,
49 fmt_word_spacing: None,
50 fmt_anchor_href: None,
51 fmt_anchor_names: vec![],
52 fmt_is_anchor: None,
53 fmt_tooltip: None,
54 fmt_underline_style: None,
55 fmt_vertical_alignment: None,
56 }],
57 heading_level: None,
58 list: None,
59 alignment: None,
60 indent: None,
61 text_indent: None,
62 marker: None,
63 top_margin: None,
64 bottom_margin: None,
65 left_margin: None,
66 right_margin: None,
67 tab_positions: vec![],
68 line_height: None,
69 non_breakable_lines: None,
70 direction: None,
71 background_color: None,
72 is_code_block: None,
73 code_language: None,
74 })
75 .collect();
76
77 let data = serde_json::to_string(&FragmentData {
78 blocks,
79 tables: vec![],
80 })
81 .expect("fragment serialization should not fail");
82
83 Self {
84 data,
85 plain_text: text.to_string(),
86 }
87 }
88
89 pub fn from_html(html: &str) -> Self {
91 let parsed = frontend::common::parser_tools::content_parser::parse_html_elements(html);
92 parsed_elements_to_fragment(parsed)
93 }
94
95 pub fn from_markdown(markdown: &str) -> Self {
97 let parsed = frontend::common::parser_tools::content_parser::parse_markdown(markdown);
98 parsed_elements_to_fragment(parsed)
99 }
100
101 pub fn from_document(doc: &crate::TextDocument) -> crate::Result<Self> {
103 let inner = doc.inner.lock();
104 let dto = frontend::document_inspection::ExtractFragmentDto {
108 position: 0,
109 anchor: i64::MAX,
110 };
111 let result =
112 frontend::commands::document_inspection_commands::extract_fragment(&inner.ctx, &dto)?;
113 Ok(Self::from_raw(result.fragment_data, result.plain_text))
114 }
115
116 pub(crate) fn from_raw(data: String, plain_text: String) -> Self {
118 Self { data, plain_text }
119 }
120
121 pub fn to_plain_text(&self) -> &str {
123 &self.plain_text
124 }
125
126 pub fn to_html(&self) -> String {
128 if self.data.is_empty() {
129 return String::from("<html><head><meta charset=\"utf-8\"></head><body></body></html>");
130 }
131
132 let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
133 Ok(d) => d,
134 Err(_) => {
135 return String::from(
136 "<html><head><meta charset=\"utf-8\"></head><body></body></html>",
137 );
138 }
139 };
140
141 let mut body = String::new();
142 let blocks = &fragment_data.blocks;
143
144 if blocks.len() == 1 && blocks[0].is_inline_only() && fragment_data.tables.is_empty() {
146 push_inline_html(&mut body, &blocks[0].elements);
147 return format!(
148 "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
149 body
150 );
151 }
152
153 let mut sorted_tables: Vec<&FragmentTable> = fragment_data.tables.iter().collect();
155 sorted_tables.sort_by_key(|t| t.block_insert_index);
156 let mut table_cursor = 0;
157
158 let mut i = 0;
159
160 while i < blocks.len() {
161 while table_cursor < sorted_tables.len()
163 && sorted_tables[table_cursor].block_insert_index <= i
164 {
165 push_table_html(&mut body, sorted_tables[table_cursor]);
166 table_cursor += 1;
167 }
168
169 let block = &blocks[i];
170
171 if let Some(ref list) = block.list {
172 let is_ordered = is_ordered_list_style(&list.style);
173 let list_tag = if is_ordered { "ol" } else { "ul" };
174 body.push('<');
175 body.push_str(list_tag);
176 body.push('>');
177
178 while i < blocks.len() {
179 let b = &blocks[i];
180 match &b.list {
181 Some(l) if is_ordered_list_style(&l.style) == is_ordered => {
182 body.push_str("<li>");
183 push_inline_html(&mut body, &b.elements);
184 body.push_str("</li>");
185 i += 1;
186 }
187 _ => break,
188 }
189 }
190
191 body.push_str("</");
192 body.push_str(list_tag);
193 body.push('>');
194 } else if let Some(level) = block.heading_level {
195 let n = level.clamp(1, 6);
196 body.push_str(&format!("<h{}>", n));
197 push_inline_html(&mut body, &block.elements);
198 body.push_str(&format!("</h{}>", n));
199 i += 1;
200 } else {
201 let style = block_style_attr(block);
203 if style.is_empty() {
204 body.push_str("<p>");
205 } else {
206 body.push_str(&format!("<p style=\"{}\">", style));
207 }
208 push_inline_html(&mut body, &block.elements);
209 body.push_str("</p>");
210 i += 1;
211 }
212 }
213
214 while table_cursor < sorted_tables.len() {
216 push_table_html(&mut body, sorted_tables[table_cursor]);
217 table_cursor += 1;
218 }
219
220 format!(
221 "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
222 body
223 )
224 }
225
226 pub fn to_markdown(&self) -> String {
228 if self.data.is_empty() {
229 return String::new();
230 }
231
232 let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
233 Ok(d) => d,
234 Err(_) => return String::new(),
235 };
236
237 let mut parts: Vec<(String, bool)> = Vec::new();
239 let mut prev_was_list = false;
240 let mut list_counter: u32 = 0;
241
242 let mut sorted_tables: Vec<&FragmentTable> = fragment_data.tables.iter().collect();
244 sorted_tables.sort_by_key(|t| t.block_insert_index);
245 let mut table_cursor = 0;
246
247 for (blk_idx, block) in fragment_data.blocks.iter().enumerate() {
248 while table_cursor < sorted_tables.len()
250 && sorted_tables[table_cursor].block_insert_index <= blk_idx
251 {
252 parts.push((render_table_markdown(sorted_tables[table_cursor]), false));
253 prev_was_list = false;
254 list_counter = 0;
255 table_cursor += 1;
256 }
257
258 let inline_text = render_inline_markdown(&block.elements);
259 let is_list = block.list.is_some();
260
261 let indent_prefix = match block.indent {
262 Some(n) if n > 0 => " ".repeat(n as usize),
263 _ => String::new(),
264 };
265
266 if let Some(level) = block.heading_level {
267 let n = level.clamp(1, 6) as usize;
268 let prefix = "#".repeat(n);
269 parts.push((format!("{} {}", prefix, inline_text), false));
270 prev_was_list = false;
271 list_counter = 0;
272 } else if let Some(ref list) = block.list {
273 let is_ordered = is_ordered_list_style(&list.style);
274 if !prev_was_list {
275 list_counter = 0;
276 }
277 if is_ordered {
278 list_counter += 1;
279 parts.push((
280 format!("{}{}. {}", indent_prefix, list_counter, inline_text),
281 true,
282 ));
283 } else {
284 parts.push((format!("{}- {}", indent_prefix, inline_text), true));
285 }
286 prev_was_list = true;
287 } else {
288 if indent_prefix.is_empty() {
289 parts.push((inline_text, false));
290 } else {
291 parts.push((format!("{}{}", indent_prefix, inline_text), false));
292 }
293 prev_was_list = false;
294 list_counter = 0;
295 }
296
297 if !is_list {
298 prev_was_list = false;
299 }
300 }
301
302 while table_cursor < sorted_tables.len() {
304 parts.push((render_table_markdown(sorted_tables[table_cursor]), false));
305 table_cursor += 1;
306 }
307
308 let mut result = String::new();
310 for (idx, (text, is_list)) in parts.iter().enumerate() {
311 if idx > 0 {
312 let (_, prev_is_list) = &parts[idx - 1];
313 if *prev_is_list && *is_list {
314 result.push('\n');
315 } else {
316 result.push_str("\n\n");
317 }
318 }
319 result.push_str(text);
320 }
321
322 result
323 }
324
325 pub fn is_empty(&self) -> bool {
327 self.plain_text.is_empty()
328 }
329
330 pub(crate) fn raw_data(&self) -> &str {
332 &self.data
333 }
334}
335
336impl Default for DocumentFragment {
337 fn default() -> Self {
338 Self::new()
339 }
340}
341
342fn is_ordered_list_style(style: &ListStyle) -> bool {
347 matches!(
348 style,
349 ListStyle::Decimal
350 | ListStyle::LowerAlpha
351 | ListStyle::UpperAlpha
352 | ListStyle::LowerRoman
353 | ListStyle::UpperRoman
354 )
355}
356
357fn escape_html(s: &str) -> String {
360 let mut out = String::with_capacity(s.len());
361 for c in s.chars() {
362 match c {
363 '&' => out.push_str("&"),
364 '<' => out.push_str("<"),
365 '>' => out.push_str(">"),
366 '"' => out.push_str("""),
367 '\'' => out.push_str("'"),
368 _ => out.push(c),
369 }
370 }
371 out
372}
373
374fn block_style_attr(block: &FragmentBlock) -> String {
376 use crate::Alignment;
377
378 let mut parts = Vec::new();
379 if let Some(ref alignment) = block.alignment {
380 let value = match alignment {
381 Alignment::Left => "left",
382 Alignment::Right => "right",
383 Alignment::Center => "center",
384 Alignment::Justify => "justify",
385 };
386 parts.push(format!("text-align: {}", value));
387 }
388 if let Some(n) = block.indent
389 && n > 0
390 {
391 parts.push(format!("margin-left: {}em", n));
392 }
393 if let Some(px) = block.text_indent
394 && px != 0
395 {
396 parts.push(format!("text-indent: {}px", px));
397 }
398 if let Some(px) = block.top_margin {
399 parts.push(format!("margin-top: {}px", px));
400 }
401 if let Some(px) = block.bottom_margin {
402 parts.push(format!("margin-bottom: {}px", px));
403 }
404 if let Some(px) = block.left_margin {
405 parts.push(format!("margin-left: {}px", px));
406 }
407 if let Some(px) = block.right_margin {
408 parts.push(format!("margin-right: {}px", px));
409 }
410 parts.join("; ")
411}
412
413fn push_inline_html(out: &mut String, elements: &[FragmentElement]) {
414 for elem in elements {
415 let text = match &elem.content {
416 InlineContent::Text(t) => escape_html(t),
417 InlineContent::Image {
418 name,
419 width,
420 height,
421 ..
422 } => {
423 format!(
424 "<img src=\"{}\" width=\"{}\" height=\"{}\">",
425 escape_html(name),
426 width,
427 height
428 )
429 }
430 InlineContent::Empty => String::new(),
431 };
432
433 let is_monospace = elem
434 .fmt_font_family
435 .as_deref()
436 .is_some_and(|f| f == "monospace");
437 let is_bold = elem.fmt_font_bold.unwrap_or(false);
438 let is_italic = elem.fmt_font_italic.unwrap_or(false);
439 let is_underline = elem.fmt_font_underline.unwrap_or(false);
440 let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
441 let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
442
443 let mut result = text;
444
445 if is_monospace {
446 result = format!("<code>{}</code>", result);
447 }
448 if is_bold {
449 result = format!("<strong>{}</strong>", result);
450 }
451 if is_italic {
452 result = format!("<em>{}</em>", result);
453 }
454 if is_underline {
455 result = format!("<u>{}</u>", result);
456 }
457 if is_strikeout {
458 result = format!("<s>{}</s>", result);
459 }
460 if is_anchor && let Some(ref href) = elem.fmt_anchor_href {
461 result = format!("<a href=\"{}\">{}</a>", escape_html(href), result);
462 }
463
464 out.push_str(&result);
465 }
466}
467
468fn push_table_html(out: &mut String, table: &FragmentTable) {
470 out.push_str("<table>");
471 for row in 0..table.rows {
472 out.push_str("<tr>");
473 for col in 0..table.columns {
474 if let Some(cell) = table.cells.iter().find(|c| c.row == row && c.column == col) {
475 out.push_str("<td");
476 if cell.row_span > 1 {
477 out.push_str(&format!(" rowspan=\"{}\"", cell.row_span));
478 }
479 if cell.column_span > 1 {
480 out.push_str(&format!(" colspan=\"{}\"", cell.column_span));
481 }
482 out.push('>');
483 for (i, block) in cell.blocks.iter().enumerate() {
484 if i > 0 {
485 out.push_str("<br>");
486 }
487 push_inline_html(out, &block.elements);
488 }
489 out.push_str("</td>");
490 }
491 }
493 out.push_str("</tr>");
494 }
495 out.push_str("</table>");
496}
497
498fn escape_markdown(s: &str) -> String {
501 let mut out = String::with_capacity(s.len());
502 for c in s.chars() {
503 if matches!(
504 c,
505 '\\' | '`'
506 | '*'
507 | '_'
508 | '{'
509 | '}'
510 | '['
511 | ']'
512 | '('
513 | ')'
514 | '#'
515 | '+'
516 | '-'
517 | '.'
518 | '!'
519 | '|'
520 | '~'
521 | '<'
522 | '>'
523 ) {
524 out.push('\\');
525 }
526 out.push(c);
527 }
528 out
529}
530
531fn render_inline_markdown(elements: &[FragmentElement]) -> String {
532 let mut out = String::new();
533 for elem in elements {
534 let raw_text = match &elem.content {
535 InlineContent::Text(t) => t.clone(),
536 InlineContent::Image { name, .. } => format!("", name, name),
537 InlineContent::Empty => String::new(),
538 };
539
540 let is_monospace = elem
541 .fmt_font_family
542 .as_deref()
543 .is_some_and(|f| f == "monospace");
544 let is_bold = elem.fmt_font_bold.unwrap_or(false);
545 let is_italic = elem.fmt_font_italic.unwrap_or(false);
546 let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
547 let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
548
549 if is_monospace {
550 out.push('`');
551 out.push_str(&raw_text);
552 out.push('`');
553 } else {
554 let mut text = escape_markdown(&raw_text);
555 if is_bold && is_italic {
556 text = format!("***{}***", text);
557 } else if is_bold {
558 text = format!("**{}**", text);
559 } else if is_italic {
560 text = format!("*{}*", text);
561 }
562 if is_strikeout {
563 text = format!("~~{}~~", text);
564 }
565 if is_anchor {
566 let href = elem.fmt_anchor_href.as_deref().unwrap_or("");
567 out.push_str(&format!("[{}]({})", text, href));
568 } else {
569 out.push_str(&text);
570 }
571 }
572 }
573 out
574}
575
576fn render_table_markdown(table: &FragmentTable) -> String {
578 let mut rows: Vec<Vec<String>> = vec![vec![String::new(); table.columns]; table.rows];
579
580 for cell in &table.cells {
581 let text: String = cell
582 .blocks
583 .iter()
584 .map(|b| render_inline_markdown(&b.elements))
585 .collect::<Vec<_>>()
586 .join(" ");
587 if cell.row < table.rows && cell.column < table.columns {
588 rows[cell.row][cell.column] = text;
589 }
590 }
591
592 let mut out = String::new();
593 for (i, row) in rows.iter().enumerate() {
594 out.push_str("| ");
595 out.push_str(&row.join(" | "));
596 out.push_str(" |");
597 if i == 0 {
598 out.push('\n');
600 out.push('|');
601 for _ in 0..table.columns {
602 out.push_str(" --- |");
603 }
604 }
605 if i + 1 < rows.len() {
606 out.push('\n');
607 }
608 }
609 out
610}
611
612fn span_to_fragment_element(span: &ParsedSpan) -> FragmentElement {
617 let content = InlineContent::Text(span.text.clone());
618 let fmt_font_family = if span.code {
619 Some("monospace".into())
620 } else {
621 None
622 };
623 let fmt_font_bold = if span.bold { Some(true) } else { None };
624 let fmt_font_italic = if span.italic { Some(true) } else { None };
625 let fmt_font_underline = if span.underline { Some(true) } else { None };
626 let fmt_font_strikeout = if span.strikeout { Some(true) } else { None };
627 let (fmt_anchor_href, fmt_is_anchor) = if let Some(ref href) = span.link_href {
628 (Some(href.clone()), Some(true))
629 } else {
630 (None, None)
631 };
632
633 FragmentElement {
634 content,
635 fmt_font_family,
636 fmt_font_point_size: None,
637 fmt_font_weight: None,
638 fmt_font_bold,
639 fmt_font_italic,
640 fmt_font_underline,
641 fmt_font_overline: None,
642 fmt_font_strikeout,
643 fmt_letter_spacing: None,
644 fmt_word_spacing: None,
645 fmt_anchor_href,
646 fmt_anchor_names: vec![],
647 fmt_is_anchor,
648 fmt_tooltip: None,
649 fmt_underline_style: None,
650 fmt_vertical_alignment: None,
651 }
652}
653
654fn parsed_elements_to_fragment(parsed: Vec<ParsedElement>) -> DocumentFragment {
657 use frontend::common::parser_tools::fragment_schema::FragmentList;
658
659 let mut blocks: Vec<FragmentBlock> = Vec::new();
660 let mut tables: Vec<FragmentTable> = Vec::new();
661
662 for elem in parsed {
663 match elem {
664 ParsedElement::Block(pb) => {
665 let elements: Vec<FragmentElement> =
666 pb.spans.iter().map(span_to_fragment_element).collect();
667 let plain_text: String = pb.spans.iter().map(|s| s.text.as_str()).collect();
668 let list = pb.list_style.map(|style| FragmentList {
669 style,
670 indent: pb.list_indent as i64,
671 prefix: String::new(),
672 suffix: String::new(),
673 });
674
675 blocks.push(FragmentBlock {
676 plain_text,
677 elements,
678 heading_level: pb.heading_level,
679 list,
680 alignment: None,
681 indent: None,
682 text_indent: None,
683 marker: None,
684 top_margin: None,
685 bottom_margin: None,
686 left_margin: None,
687 right_margin: None,
688 tab_positions: vec![],
689 line_height: pb.line_height,
690 non_breakable_lines: pb.non_breakable_lines,
691 direction: pb.direction,
692 background_color: pb.background_color,
693 is_code_block: None,
694 code_language: None,
695 });
696 }
697 ParsedElement::Table(pt) => {
698 let block_insert_index = blocks.len();
699 let num_columns = pt.rows.iter().map(|r| r.len()).max().unwrap_or(0);
700 let num_rows = pt.rows.len();
701
702 let mut frag_cells: Vec<FragmentTableCell> = Vec::new();
703 for (row_idx, row) in pt.rows.iter().enumerate() {
704 for (col_idx, cell) in row.iter().enumerate() {
705 let cell_elements: Vec<FragmentElement> =
706 cell.spans.iter().map(span_to_fragment_element).collect();
707 let cell_text: String =
708 cell.spans.iter().map(|s| s.text.as_str()).collect();
709
710 frag_cells.push(FragmentTableCell {
711 row: row_idx,
712 column: col_idx,
713 row_span: 1,
714 column_span: 1,
715 blocks: vec![FragmentBlock {
716 plain_text: cell_text,
717 elements: cell_elements,
718 heading_level: None,
719 list: None,
720 alignment: None,
721 indent: None,
722 text_indent: None,
723 marker: None,
724 top_margin: None,
725 bottom_margin: None,
726 left_margin: None,
727 right_margin: None,
728 tab_positions: vec![],
729 line_height: None,
730 non_breakable_lines: None,
731 direction: None,
732 background_color: None,
733 is_code_block: None,
734 code_language: None,
735 }],
736 fmt_padding: None,
737 fmt_border: None,
738 fmt_vertical_alignment: None,
739 fmt_background_color: None,
740 });
741 }
742 }
743
744 tables.push(FragmentTable {
745 rows: num_rows,
746 columns: num_columns,
747 cells: frag_cells,
748 block_insert_index,
749 fmt_border: None,
750 fmt_cell_spacing: None,
751 fmt_cell_padding: None,
752 fmt_width: None,
753 fmt_alignment: None,
754 column_widths: vec![],
755 });
756 }
757 }
758 }
759
760 let data = serde_json::to_string(&FragmentData { blocks, tables })
761 .expect("fragment serialization should not fail");
762
763 let plain_text = parsed_plain_text_from_data(&data);
764
765 DocumentFragment { data, plain_text }
766}
767
768fn parsed_plain_text_from_data(data: &str) -> String {
770 let fragment_data: FragmentData = match serde_json::from_str(data) {
771 Ok(d) => d,
772 Err(_) => return String::new(),
773 };
774
775 fragment_data
776 .blocks
777 .iter()
778 .map(|b| b.plain_text.as_str())
779 .collect::<Vec<_>>()
780 .join("\n")
781}