1use crate::{InlineContent, ListStyle};
4use frontend::common::parser_tools::content_parser::{ParsedElement, ParsedSpan};
5use frontend::common::parser_tools::fragment_schema::{
6 FragmentBlock, FragmentData, FragmentElement, FragmentTable, FragmentTableCell,
7};
8
9#[derive(Debug, Clone)]
15pub struct DocumentFragment {
16 data: String,
17 plain_text: String,
18}
19
20impl DocumentFragment {
21 pub fn new() -> Self {
23 Self {
24 data: String::new(),
25 plain_text: String::new(),
26 }
27 }
28
29 pub fn from_plain_text(text: &str) -> Self {
34 let blocks: Vec<FragmentBlock> = text
35 .split('\n')
36 .map(|line| FragmentBlock {
37 plain_text: line.to_string(),
38 elements: vec![FragmentElement {
39 content: InlineContent::Text(line.to_string()),
40 fmt_font_family: None,
41 fmt_font_point_size: None,
42 fmt_font_weight: None,
43 fmt_font_bold: None,
44 fmt_font_italic: None,
45 fmt_font_underline: None,
46 fmt_font_overline: None,
47 fmt_font_strikeout: None,
48 fmt_letter_spacing: None,
49 fmt_word_spacing: None,
50 fmt_anchor_href: None,
51 fmt_anchor_names: vec![],
52 fmt_is_anchor: None,
53 fmt_tooltip: None,
54 fmt_underline_style: None,
55 fmt_vertical_alignment: None,
56 }],
57 heading_level: None,
58 list: None,
59 alignment: None,
60 indent: None,
61 text_indent: None,
62 marker: None,
63 top_margin: None,
64 bottom_margin: None,
65 left_margin: None,
66 right_margin: None,
67 tab_positions: vec![],
68 line_height: None,
69 non_breakable_lines: None,
70 direction: None,
71 background_color: None,
72 is_code_block: None,
73 code_language: None,
74 })
75 .collect();
76
77 let data = serde_json::to_string(&FragmentData {
78 blocks,
79 tables: vec![],
80 })
81 .expect("fragment serialization should not fail");
82
83 Self {
84 data,
85 plain_text: text.to_string(),
86 }
87 }
88
89 pub fn from_html(html: &str) -> Self {
91 let parsed = frontend::common::parser_tools::content_parser::parse_html_elements(html);
92 parsed_elements_to_fragment(parsed)
93 }
94
95 pub fn from_markdown(markdown: &str) -> Self {
97 let parsed = frontend::common::parser_tools::content_parser::parse_markdown(markdown);
98 parsed_elements_to_fragment(parsed)
99 }
100
101 pub fn from_document(doc: &crate::TextDocument) -> crate::Result<Self> {
103 let inner = doc.inner.lock();
104 let dto = frontend::document_inspection::ExtractFragmentDto {
108 position: 0,
109 anchor: i64::MAX,
110 };
111 let result =
112 frontend::commands::document_inspection_commands::extract_fragment(&inner.ctx, &dto)?;
113 Ok(Self::from_raw(result.fragment_data, result.plain_text))
114 }
115
116 pub(crate) fn from_raw(data: String, plain_text: String) -> Self {
118 Self { data, plain_text }
119 }
120
121 pub fn to_plain_text(&self) -> &str {
123 &self.plain_text
124 }
125
126 pub fn to_html(&self) -> String {
128 if self.data.is_empty() {
129 return String::from("<html><head><meta charset=\"utf-8\"></head><body></body></html>");
130 }
131
132 let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
133 Ok(d) => d,
134 Err(_) => {
135 return String::from(
136 "<html><head><meta charset=\"utf-8\"></head><body></body></html>",
137 );
138 }
139 };
140
141 let mut body = String::new();
142 let blocks = &fragment_data.blocks;
143
144 if blocks.len() == 1 && blocks[0].is_inline_only() && fragment_data.tables.is_empty() {
146 push_inline_html(&mut body, &blocks[0].elements);
147 return format!(
148 "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
149 body
150 );
151 }
152
153 let mut sorted_tables: Vec<&FragmentTable> = fragment_data.tables.iter().collect();
155 sorted_tables.sort_by_key(|t| t.block_insert_index);
156 let mut table_cursor = 0;
157
158 let mut i = 0;
159
160 while i < blocks.len() {
161 while table_cursor < sorted_tables.len()
163 && sorted_tables[table_cursor].block_insert_index <= i
164 {
165 push_table_html(&mut body, sorted_tables[table_cursor]);
166 table_cursor += 1;
167 }
168
169 let block = &blocks[i];
170
171 if let Some(ref list) = block.list {
172 let is_ordered = is_ordered_list_style(&list.style);
173 let list_tag = if is_ordered { "ol" } else { "ul" };
174 body.push('<');
175 body.push_str(list_tag);
176 body.push('>');
177
178 while i < blocks.len() {
179 let b = &blocks[i];
180 match &b.list {
181 Some(l) if is_ordered_list_style(&l.style) == is_ordered => {
182 body.push_str("<li>");
183 push_inline_html(&mut body, &b.elements);
184 body.push_str("</li>");
185 i += 1;
186 }
187 _ => break,
188 }
189 }
190
191 body.push_str("</");
192 body.push_str(list_tag);
193 body.push('>');
194 } else if let Some(level) = block.heading_level {
195 let n = level.clamp(1, 6);
196 body.push_str(&format!("<h{}>", n));
197 push_inline_html(&mut body, &block.elements);
198 body.push_str(&format!("</h{}>", n));
199 i += 1;
200 } else {
201 let style = block_style_attr(block);
203 if style.is_empty() {
204 body.push_str("<p>");
205 } else {
206 body.push_str(&format!("<p style=\"{}\">", style));
207 }
208 push_inline_html(&mut body, &block.elements);
209 body.push_str("</p>");
210 i += 1;
211 }
212 }
213
214 while table_cursor < sorted_tables.len() {
216 push_table_html(&mut body, sorted_tables[table_cursor]);
217 table_cursor += 1;
218 }
219
220 format!(
221 "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
222 body
223 )
224 }
225
226 pub fn to_markdown(&self) -> String {
228 if self.data.is_empty() {
229 return String::new();
230 }
231
232 let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
233 Ok(d) => d,
234 Err(_) => return String::new(),
235 };
236
237 let mut parts: Vec<(String, bool)> = Vec::new();
239 let mut prev_was_list = false;
240 let mut list_counter: u32 = 0;
241
242 let mut sorted_tables: Vec<&FragmentTable> = fragment_data.tables.iter().collect();
244 sorted_tables.sort_by_key(|t| t.block_insert_index);
245 let mut table_cursor = 0;
246
247 for (blk_idx, block) in fragment_data.blocks.iter().enumerate() {
248 while table_cursor < sorted_tables.len()
250 && sorted_tables[table_cursor].block_insert_index <= blk_idx
251 {
252 parts.push((render_table_markdown(sorted_tables[table_cursor]), false));
253 prev_was_list = false;
254 list_counter = 0;
255 table_cursor += 1;
256 }
257
258 let inline_text = render_inline_markdown(&block.elements);
259 let is_list = block.list.is_some();
260
261 let indent_prefix = match block.indent {
262 Some(n) if n > 0 => " ".repeat(n as usize),
263 _ => String::new(),
264 };
265
266 if let Some(level) = block.heading_level {
267 let n = level.clamp(1, 6) as usize;
268 let prefix = "#".repeat(n);
269 parts.push((format!("{} {}", prefix, inline_text), false));
270 prev_was_list = false;
271 list_counter = 0;
272 } else if let Some(ref list) = block.list {
273 let is_ordered = is_ordered_list_style(&list.style);
274 if !prev_was_list {
275 list_counter = 0;
276 }
277 if is_ordered {
278 list_counter += 1;
279 parts.push((
280 format!("{}{}. {}", indent_prefix, list_counter, inline_text),
281 true,
282 ));
283 } else {
284 parts.push((format!("{}- {}", indent_prefix, inline_text), true));
285 }
286 prev_was_list = true;
287 } else {
288 if indent_prefix.is_empty() {
289 parts.push((inline_text, false));
290 } else {
291 parts.push((format!("{}{}", indent_prefix, inline_text), false));
292 }
293 prev_was_list = false;
294 list_counter = 0;
295 }
296
297 if !is_list {
298 prev_was_list = false;
299 }
300 }
301
302 while table_cursor < sorted_tables.len() {
304 parts.push((render_table_markdown(sorted_tables[table_cursor]), false));
305 table_cursor += 1;
306 }
307
308 let mut result = String::new();
310 for (idx, (text, is_list)) in parts.iter().enumerate() {
311 if idx > 0 {
312 let (_, prev_is_list) = &parts[idx - 1];
313 if *prev_is_list && *is_list {
314 result.push('\n');
315 } else {
316 result.push_str("\n\n");
317 }
318 }
319 result.push_str(text);
320 }
321
322 result
323 }
324
325 pub fn is_empty(&self) -> bool {
327 self.plain_text.is_empty()
328 }
329
330 pub(crate) fn raw_data(&self) -> &str {
332 &self.data
333 }
334}
335
336impl Default for DocumentFragment {
337 fn default() -> Self {
338 Self::new()
339 }
340}
341
342fn is_ordered_list_style(style: &ListStyle) -> bool {
347 matches!(
348 style,
349 ListStyle::Decimal
350 | ListStyle::LowerAlpha
351 | ListStyle::UpperAlpha
352 | ListStyle::LowerRoman
353 | ListStyle::UpperRoman
354 )
355}
356
357fn escape_html(s: &str) -> String {
360 let mut out = String::with_capacity(s.len());
361 for c in s.chars() {
362 match c {
363 '&' => out.push_str("&"),
364 '<' => out.push_str("<"),
365 '>' => out.push_str(">"),
366 '"' => out.push_str("""),
367 '\'' => out.push_str("'"),
368 '\r' => out.push_str(" "),
373 _ => out.push(c),
374 }
375 }
376 out
377}
378
379fn block_style_attr(block: &FragmentBlock) -> String {
381 use crate::Alignment;
382
383 let mut parts = Vec::new();
384 if let Some(ref alignment) = block.alignment {
385 let value = match alignment {
386 Alignment::Left => "left",
387 Alignment::Right => "right",
388 Alignment::Center => "center",
389 Alignment::Justify => "justify",
390 };
391 parts.push(format!("text-align: {}", value));
392 }
393 if let Some(n) = block.indent
394 && n > 0
395 {
396 parts.push(format!("margin-left: {}em", n));
397 }
398 if let Some(px) = block.text_indent
399 && px != 0
400 {
401 parts.push(format!("text-indent: {}px", px));
402 }
403 if let Some(px) = block.top_margin {
404 parts.push(format!("margin-top: {}px", px));
405 }
406 if let Some(px) = block.bottom_margin {
407 parts.push(format!("margin-bottom: {}px", px));
408 }
409 if let Some(px) = block.left_margin {
410 parts.push(format!("margin-left: {}px", px));
411 }
412 if let Some(px) = block.right_margin {
413 parts.push(format!("margin-right: {}px", px));
414 }
415 parts.join("; ")
416}
417
418fn push_inline_html(out: &mut String, elements: &[FragmentElement]) {
419 for elem in elements {
420 let text = match &elem.content {
421 InlineContent::Text(t) => escape_html(t),
422 InlineContent::Image {
423 name,
424 width,
425 height,
426 ..
427 } => {
428 format!(
429 "<img src=\"{}\" width=\"{}\" height=\"{}\">",
430 escape_html(name),
431 width,
432 height
433 )
434 }
435 InlineContent::Empty => String::new(),
436 };
437
438 let is_monospace = elem
439 .fmt_font_family
440 .as_deref()
441 .is_some_and(|f| f == "monospace");
442 let is_bold = elem.fmt_font_bold.unwrap_or(false);
443 let is_italic = elem.fmt_font_italic.unwrap_or(false);
444 let is_underline = elem.fmt_font_underline.unwrap_or(false);
445 let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
446 let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
447
448 let mut result = text;
449
450 if is_monospace {
451 result = format!("<code>{}</code>", result);
452 }
453 if is_bold {
454 result = format!("<strong>{}</strong>", result);
455 }
456 if is_italic {
457 result = format!("<em>{}</em>", result);
458 }
459 if is_underline {
460 result = format!("<u>{}</u>", result);
461 }
462 if is_strikeout {
463 result = format!("<s>{}</s>", result);
464 }
465 if is_anchor && let Some(ref href) = elem.fmt_anchor_href {
466 result = format!("<a href=\"{}\">{}</a>", escape_html(href), result);
467 }
468
469 out.push_str(&result);
470 }
471}
472
473fn push_table_html(out: &mut String, table: &FragmentTable) {
475 out.push_str("<table>");
476 for row in 0..table.rows {
477 out.push_str("<tr>");
478 for col in 0..table.columns {
479 if let Some(cell) = table.cells.iter().find(|c| c.row == row && c.column == col) {
480 out.push_str("<td");
481 if cell.row_span > 1 {
482 out.push_str(&format!(" rowspan=\"{}\"", cell.row_span));
483 }
484 if cell.column_span > 1 {
485 out.push_str(&format!(" colspan=\"{}\"", cell.column_span));
486 }
487 out.push('>');
488 for (i, block) in cell.blocks.iter().enumerate() {
489 if i > 0 {
490 out.push_str("<br>");
491 }
492 push_inline_html(out, &block.elements);
493 }
494 out.push_str("</td>");
495 }
496 }
498 out.push_str("</tr>");
499 }
500 out.push_str("</table>");
501}
502
503fn escape_markdown(s: &str) -> String {
506 let mut out = String::with_capacity(s.len());
507 for c in s.chars() {
508 if matches!(
509 c,
510 '\\' | '`'
511 | '*'
512 | '_'
513 | '{'
514 | '}'
515 | '['
516 | ']'
517 | '('
518 | ')'
519 | '#'
520 | '+'
521 | '-'
522 | '.'
523 | '!'
524 | '|'
525 | '~'
526 | '<'
527 | '>'
528 ) {
529 out.push('\\');
530 }
531 out.push(c);
532 }
533 out
534}
535
536fn render_inline_markdown(elements: &[FragmentElement]) -> String {
537 let mut out = String::new();
538 for elem in elements {
539 let raw_text = match &elem.content {
540 InlineContent::Text(t) => t.clone(),
541 InlineContent::Image { name, .. } => format!("", name, name),
542 InlineContent::Empty => String::new(),
543 };
544
545 let is_monospace = elem
546 .fmt_font_family
547 .as_deref()
548 .is_some_and(|f| f == "monospace");
549 let is_bold = elem.fmt_font_bold.unwrap_or(false);
550 let is_italic = elem.fmt_font_italic.unwrap_or(false);
551 let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
552 let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
553
554 if is_monospace {
555 out.push('`');
556 out.push_str(&raw_text);
557 out.push('`');
558 } else {
559 let mut text = escape_markdown(&raw_text);
560 if is_bold && is_italic {
561 text = format!("***{}***", text);
562 } else if is_bold {
563 text = format!("**{}**", text);
564 } else if is_italic {
565 text = format!("*{}*", text);
566 }
567 if is_strikeout {
568 text = format!("~~{}~~", text);
569 }
570 if is_anchor {
571 let href = elem.fmt_anchor_href.as_deref().unwrap_or("");
572 out.push_str(&format!("[{}]({})", text, href));
573 } else {
574 out.push_str(&text);
575 }
576 }
577 }
578 out
579}
580
581fn render_table_markdown(table: &FragmentTable) -> String {
583 let mut rows: Vec<Vec<String>> = vec![vec![String::new(); table.columns]; table.rows];
584
585 for cell in &table.cells {
586 let text: String = cell
587 .blocks
588 .iter()
589 .map(|b| render_inline_markdown(&b.elements))
590 .collect::<Vec<_>>()
591 .join(" ");
592 if cell.row < table.rows && cell.column < table.columns {
593 rows[cell.row][cell.column] = text;
594 }
595 }
596
597 let mut out = String::new();
598 for (i, row) in rows.iter().enumerate() {
599 out.push_str("| ");
600 out.push_str(&row.join(" | "));
601 out.push_str(" |");
602 if i == 0 {
603 out.push('\n');
605 out.push('|');
606 for _ in 0..table.columns {
607 out.push_str(" --- |");
608 }
609 }
610 if i + 1 < rows.len() {
611 out.push('\n');
612 }
613 }
614 out
615}
616
617fn span_to_fragment_element(span: &ParsedSpan) -> FragmentElement {
622 let content = InlineContent::Text(span.text.clone());
623 let fmt_font_family = if span.code {
624 Some("monospace".into())
625 } else {
626 None
627 };
628 let fmt_font_bold = if span.bold { Some(true) } else { None };
629 let fmt_font_italic = if span.italic { Some(true) } else { None };
630 let fmt_font_underline = if span.underline { Some(true) } else { None };
631 let fmt_font_strikeout = if span.strikeout { Some(true) } else { None };
632 let (fmt_anchor_href, fmt_is_anchor) = if let Some(ref href) = span.link_href {
633 (Some(href.clone()), Some(true))
634 } else {
635 (None, None)
636 };
637
638 FragmentElement {
639 content,
640 fmt_font_family,
641 fmt_font_point_size: None,
642 fmt_font_weight: None,
643 fmt_font_bold,
644 fmt_font_italic,
645 fmt_font_underline,
646 fmt_font_overline: None,
647 fmt_font_strikeout,
648 fmt_letter_spacing: None,
649 fmt_word_spacing: None,
650 fmt_anchor_href,
651 fmt_anchor_names: vec![],
652 fmt_is_anchor,
653 fmt_tooltip: None,
654 fmt_underline_style: None,
655 fmt_vertical_alignment: None,
656 }
657}
658
659fn parsed_elements_to_fragment(parsed: Vec<ParsedElement>) -> DocumentFragment {
662 use frontend::common::parser_tools::fragment_schema::FragmentList;
663
664 let mut blocks: Vec<FragmentBlock> = Vec::new();
665 let mut tables: Vec<FragmentTable> = Vec::new();
666
667 for elem in parsed {
668 match elem {
669 ParsedElement::Block(pb) => {
670 let elements: Vec<FragmentElement> =
671 pb.spans.iter().map(span_to_fragment_element).collect();
672 let plain_text: String = pb.spans.iter().map(|s| s.text.as_str()).collect();
673 let list = pb.list_style.map(|style| FragmentList {
674 style,
675 indent: pb.list_indent as i64,
676 prefix: String::new(),
677 suffix: String::new(),
678 });
679
680 blocks.push(FragmentBlock {
681 plain_text,
682 elements,
683 heading_level: pb.heading_level,
684 list,
685 alignment: None,
686 indent: None,
687 text_indent: None,
688 marker: None,
689 top_margin: None,
690 bottom_margin: None,
691 left_margin: None,
692 right_margin: None,
693 tab_positions: vec![],
694 line_height: pb.line_height,
695 non_breakable_lines: pb.non_breakable_lines,
696 direction: pb.direction,
697 background_color: pb.background_color,
698 is_code_block: None,
699 code_language: None,
700 });
701 }
702 ParsedElement::Table(pt) => {
703 let block_insert_index = blocks.len();
704 let num_columns = pt.rows.iter().map(|r| r.len()).max().unwrap_or(0);
705 let num_rows = pt.rows.len();
706
707 let mut frag_cells: Vec<FragmentTableCell> = Vec::new();
708 for (row_idx, row) in pt.rows.iter().enumerate() {
709 for (col_idx, cell) in row.iter().enumerate() {
710 let cell_elements: Vec<FragmentElement> =
711 cell.spans.iter().map(span_to_fragment_element).collect();
712 let cell_text: String =
713 cell.spans.iter().map(|s| s.text.as_str()).collect();
714
715 frag_cells.push(FragmentTableCell {
716 row: row_idx,
717 column: col_idx,
718 row_span: 1,
719 column_span: 1,
720 blocks: vec![FragmentBlock {
721 plain_text: cell_text,
722 elements: cell_elements,
723 heading_level: None,
724 list: None,
725 alignment: None,
726 indent: None,
727 text_indent: None,
728 marker: None,
729 top_margin: None,
730 bottom_margin: None,
731 left_margin: None,
732 right_margin: None,
733 tab_positions: vec![],
734 line_height: None,
735 non_breakable_lines: None,
736 direction: None,
737 background_color: None,
738 is_code_block: None,
739 code_language: None,
740 }],
741 fmt_padding: None,
742 fmt_border: None,
743 fmt_vertical_alignment: None,
744 fmt_background_color: None,
745 });
746 }
747 }
748
749 tables.push(FragmentTable {
750 rows: num_rows,
751 columns: num_columns,
752 cells: frag_cells,
753 block_insert_index,
754 fmt_border: None,
755 fmt_cell_spacing: None,
756 fmt_cell_padding: None,
757 fmt_width: None,
758 fmt_alignment: None,
759 column_widths: vec![],
760 });
761 }
762 }
763 }
764
765 let data = serde_json::to_string(&FragmentData { blocks, tables })
766 .expect("fragment serialization should not fail");
767
768 let plain_text = parsed_plain_text_from_data(&data);
769
770 DocumentFragment { data, plain_text }
771}
772
773fn parsed_plain_text_from_data(data: &str) -> String {
775 let fragment_data: FragmentData = match serde_json::from_str(data) {
776 Ok(d) => d,
777 Err(_) => return String::new(),
778 };
779
780 fragment_data
781 .blocks
782 .iter()
783 .map(|b| b.plain_text.as_str())
784 .collect::<Vec<_>>()
785 .join("\n")
786}