1use crate::{InlineContent, ListStyle};
4use frontend::common::parser_tools::fragment_schema::{
5 FragmentBlock, FragmentData, FragmentElement,
6};
7
8#[derive(Debug, Clone)]
14pub struct DocumentFragment {
15 data: String,
16 plain_text: String,
17}
18
19impl DocumentFragment {
20 pub fn new() -> Self {
22 Self {
23 data: String::new(),
24 plain_text: String::new(),
25 }
26 }
27
28 pub fn from_plain_text(text: &str) -> Self {
33 let blocks: Vec<FragmentBlock> = text
34 .split('\n')
35 .map(|line| FragmentBlock {
36 plain_text: line.to_string(),
37 elements: vec![FragmentElement {
38 content: InlineContent::Text(line.to_string()),
39 fmt_font_family: None,
40 fmt_font_point_size: None,
41 fmt_font_weight: None,
42 fmt_font_bold: None,
43 fmt_font_italic: None,
44 fmt_font_underline: None,
45 fmt_font_overline: None,
46 fmt_font_strikeout: None,
47 fmt_letter_spacing: None,
48 fmt_word_spacing: None,
49 fmt_anchor_href: None,
50 fmt_anchor_names: vec![],
51 fmt_is_anchor: None,
52 fmt_tooltip: None,
53 fmt_underline_style: None,
54 fmt_vertical_alignment: None,
55 }],
56 heading_level: None,
57 list: None,
58 alignment: None,
59 indent: None,
60 text_indent: None,
61 marker: None,
62 top_margin: None,
63 bottom_margin: None,
64 left_margin: None,
65 right_margin: None,
66 tab_positions: vec![],
67 line_height: None,
68 non_breakable_lines: None,
69 direction: None,
70 background_color: None,
71 is_code_block: None,
72 code_language: None,
73 })
74 .collect();
75
76 let data = serde_json::to_string(&FragmentData {
77 blocks,
78 tables: vec![],
79 })
80 .expect("fragment serialization should not fail");
81
82 Self {
83 data,
84 plain_text: text.to_string(),
85 }
86 }
87
88 pub fn from_html(html: &str) -> Self {
90 let parsed = frontend::common::parser_tools::content_parser::parse_html(html);
91 parsed_blocks_to_fragment(parsed)
92 }
93
94 pub fn from_markdown(markdown: &str) -> Self {
96 let parsed = frontend::common::parser_tools::content_parser::parse_markdown(markdown);
97 let blocks =
98 frontend::common::parser_tools::content_parser::ParsedElement::flatten_to_blocks(
99 parsed,
100 );
101 parsed_blocks_to_fragment(blocks)
102 }
103
104 pub fn from_document(doc: &crate::TextDocument) -> crate::Result<Self> {
106 let inner = doc.inner.lock();
107 let char_count = {
108 let stats =
109 frontend::commands::document_inspection_commands::get_document_stats(&inner.ctx)?;
110 crate::convert::to_usize(stats.character_count)
111 };
112 let dto = frontend::document_inspection::ExtractFragmentDto {
113 position: 0,
114 anchor: crate::convert::to_i64(char_count),
115 };
116 let result =
117 frontend::commands::document_inspection_commands::extract_fragment(&inner.ctx, &dto)?;
118 Ok(Self::from_raw(result.fragment_data, result.plain_text))
119 }
120
121 pub(crate) fn from_raw(data: String, plain_text: String) -> Self {
123 Self { data, plain_text }
124 }
125
126 pub fn to_plain_text(&self) -> &str {
128 &self.plain_text
129 }
130
131 pub fn to_html(&self) -> String {
133 if self.data.is_empty() {
134 return String::from("<html><head><meta charset=\"utf-8\"></head><body></body></html>");
135 }
136
137 let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
138 Ok(d) => d,
139 Err(_) => {
140 return String::from(
141 "<html><head><meta charset=\"utf-8\"></head><body></body></html>",
142 );
143 }
144 };
145
146 let mut body = String::new();
147 let blocks = &fragment_data.blocks;
148
149 if blocks.len() == 1 && blocks[0].is_inline_only() {
151 push_inline_html(&mut body, &blocks[0].elements);
152 return format!(
153 "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
154 body
155 );
156 }
157
158 let mut i = 0;
159
160 while i < blocks.len() {
161 let block = &blocks[i];
162
163 if let Some(ref list) = block.list {
164 let is_ordered = is_ordered_list_style(&list.style);
165 let list_tag = if is_ordered { "ol" } else { "ul" };
166 body.push('<');
167 body.push_str(list_tag);
168 body.push('>');
169
170 while i < blocks.len() {
171 let b = &blocks[i];
172 match &b.list {
173 Some(l) if is_ordered_list_style(&l.style) == is_ordered => {
174 body.push_str("<li>");
175 push_inline_html(&mut body, &b.elements);
176 body.push_str("</li>");
177 i += 1;
178 }
179 _ => break,
180 }
181 }
182
183 body.push_str("</");
184 body.push_str(list_tag);
185 body.push('>');
186 } else if let Some(level) = block.heading_level {
187 let n = level.clamp(1, 6);
188 body.push_str(&format!("<h{}>", n));
189 push_inline_html(&mut body, &block.elements);
190 body.push_str(&format!("</h{}>", n));
191 i += 1;
192 } else {
193 let style = block_style_attr(block);
195 if style.is_empty() {
196 body.push_str("<p>");
197 } else {
198 body.push_str(&format!("<p style=\"{}\">", style));
199 }
200 push_inline_html(&mut body, &block.elements);
201 body.push_str("</p>");
202 i += 1;
203 }
204 }
205
206 format!(
207 "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
208 body
209 )
210 }
211
212 pub fn to_markdown(&self) -> String {
214 if self.data.is_empty() {
215 return String::new();
216 }
217
218 let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
219 Ok(d) => d,
220 Err(_) => return String::new(),
221 };
222
223 let mut parts: Vec<String> = Vec::new();
224 let mut prev_was_list = false;
225 let mut list_counter: u32 = 0;
226
227 for block in &fragment_data.blocks {
228 let inline_text = render_inline_markdown(&block.elements);
229 let is_list = block.list.is_some();
230
231 let indent_prefix = match block.indent {
233 Some(n) if n > 0 => " ".repeat(n as usize),
234 _ => String::new(),
235 };
236
237 if let Some(level) = block.heading_level {
238 let n = level.clamp(1, 6) as usize;
239 let prefix = "#".repeat(n);
240 parts.push(format!("{} {}", prefix, inline_text));
241 prev_was_list = false;
242 list_counter = 0;
243 } else if let Some(ref list) = block.list {
244 let is_ordered = is_ordered_list_style(&list.style);
245 if !prev_was_list {
246 list_counter = 0;
247 }
248 if is_ordered {
249 list_counter += 1;
250 parts.push(format!(
251 "{}{}. {}",
252 indent_prefix, list_counter, inline_text
253 ));
254 } else {
255 parts.push(format!("{}- {}", indent_prefix, inline_text));
256 }
257 prev_was_list = true;
258 } else {
259 if indent_prefix.is_empty() {
261 parts.push(inline_text);
262 } else {
263 parts.push(format!("{}{}", indent_prefix, inline_text));
264 }
265 prev_was_list = false;
266 list_counter = 0;
267 }
268
269 if !is_list {
270 prev_was_list = false;
271 }
272 }
273
274 let mut result = String::new();
276 let blocks = &fragment_data.blocks;
277 for (idx, part) in parts.iter().enumerate() {
278 if idx > 0 {
279 let prev_is_list = blocks[idx - 1].list.is_some();
280 let curr_is_list = blocks[idx].list.is_some();
281 if prev_is_list && curr_is_list {
282 result.push('\n');
283 } else {
284 result.push_str("\n\n");
285 }
286 }
287 result.push_str(part);
288 }
289
290 result
291 }
292
293 pub fn is_empty(&self) -> bool {
295 self.plain_text.is_empty()
296 }
297
298 pub(crate) fn raw_data(&self) -> &str {
300 &self.data
301 }
302}
303
304impl Default for DocumentFragment {
305 fn default() -> Self {
306 Self::new()
307 }
308}
309
310fn is_ordered_list_style(style: &ListStyle) -> bool {
315 matches!(
316 style,
317 ListStyle::Decimal
318 | ListStyle::LowerAlpha
319 | ListStyle::UpperAlpha
320 | ListStyle::LowerRoman
321 | ListStyle::UpperRoman
322 )
323}
324
325fn escape_html(s: &str) -> String {
328 let mut out = String::with_capacity(s.len());
329 for c in s.chars() {
330 match c {
331 '&' => out.push_str("&"),
332 '<' => out.push_str("<"),
333 '>' => out.push_str(">"),
334 '"' => out.push_str("""),
335 '\'' => out.push_str("'"),
336 _ => out.push(c),
337 }
338 }
339 out
340}
341
342fn block_style_attr(block: &FragmentBlock) -> String {
344 use crate::Alignment;
345
346 let mut parts = Vec::new();
347 if let Some(ref alignment) = block.alignment {
348 let value = match alignment {
349 Alignment::Left => "left",
350 Alignment::Right => "right",
351 Alignment::Center => "center",
352 Alignment::Justify => "justify",
353 };
354 parts.push(format!("text-align: {}", value));
355 }
356 if let Some(n) = block.indent
357 && n > 0
358 {
359 parts.push(format!("margin-left: {}em", n));
360 }
361 if let Some(px) = block.text_indent
362 && px != 0
363 {
364 parts.push(format!("text-indent: {}px", px));
365 }
366 if let Some(px) = block.top_margin {
367 parts.push(format!("margin-top: {}px", px));
368 }
369 if let Some(px) = block.bottom_margin {
370 parts.push(format!("margin-bottom: {}px", px));
371 }
372 if let Some(px) = block.left_margin {
373 parts.push(format!("margin-left: {}px", px));
374 }
375 if let Some(px) = block.right_margin {
376 parts.push(format!("margin-right: {}px", px));
377 }
378 parts.join("; ")
379}
380
381fn push_inline_html(out: &mut String, elements: &[FragmentElement]) {
382 for elem in elements {
383 let text = match &elem.content {
384 InlineContent::Text(t) => escape_html(t),
385 InlineContent::Image {
386 name,
387 width,
388 height,
389 ..
390 } => {
391 format!(
392 "<img src=\"{}\" width=\"{}\" height=\"{}\">",
393 escape_html(name),
394 width,
395 height
396 )
397 }
398 InlineContent::Empty => String::new(),
399 };
400
401 let is_monospace = elem
402 .fmt_font_family
403 .as_deref()
404 .is_some_and(|f| f == "monospace");
405 let is_bold = elem.fmt_font_bold.unwrap_or(false);
406 let is_italic = elem.fmt_font_italic.unwrap_or(false);
407 let is_underline = elem.fmt_font_underline.unwrap_or(false);
408 let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
409 let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
410
411 let mut result = text;
412
413 if is_monospace {
414 result = format!("<code>{}</code>", result);
415 }
416 if is_bold {
417 result = format!("<strong>{}</strong>", result);
418 }
419 if is_italic {
420 result = format!("<em>{}</em>", result);
421 }
422 if is_underline {
423 result = format!("<u>{}</u>", result);
424 }
425 if is_strikeout {
426 result = format!("<s>{}</s>", result);
427 }
428 if is_anchor && let Some(ref href) = elem.fmt_anchor_href {
429 result = format!("<a href=\"{}\">{}</a>", escape_html(href), result);
430 }
431
432 out.push_str(&result);
433 }
434}
435
436fn escape_markdown(s: &str) -> String {
439 let mut out = String::with_capacity(s.len());
440 for c in s.chars() {
441 if matches!(
442 c,
443 '\\' | '`'
444 | '*'
445 | '_'
446 | '{'
447 | '}'
448 | '['
449 | ']'
450 | '('
451 | ')'
452 | '#'
453 | '+'
454 | '-'
455 | '.'
456 | '!'
457 | '|'
458 | '~'
459 | '<'
460 | '>'
461 ) {
462 out.push('\\');
463 }
464 out.push(c);
465 }
466 out
467}
468
469fn render_inline_markdown(elements: &[FragmentElement]) -> String {
470 let mut out = String::new();
471 for elem in elements {
472 let raw_text = match &elem.content {
473 InlineContent::Text(t) => t.clone(),
474 InlineContent::Image { name, .. } => format!("", name, name),
475 InlineContent::Empty => String::new(),
476 };
477
478 let is_monospace = elem
479 .fmt_font_family
480 .as_deref()
481 .is_some_and(|f| f == "monospace");
482 let is_bold = elem.fmt_font_bold.unwrap_or(false);
483 let is_italic = elem.fmt_font_italic.unwrap_or(false);
484 let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
485 let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
486
487 if is_monospace {
488 out.push('`');
489 out.push_str(&raw_text);
490 out.push('`');
491 } else {
492 let mut text = escape_markdown(&raw_text);
493 if is_bold && is_italic {
494 text = format!("***{}***", text);
495 } else if is_bold {
496 text = format!("**{}**", text);
497 } else if is_italic {
498 text = format!("*{}*", text);
499 }
500 if is_strikeout {
501 text = format!("~~{}~~", text);
502 }
503 if is_anchor {
504 let href = elem.fmt_anchor_href.as_deref().unwrap_or("");
505 out.push_str(&format!("[{}]({})", text, href));
506 } else {
507 out.push_str(&text);
508 }
509 }
510 }
511 out
512}
513
514fn parsed_blocks_to_fragment(
518 parsed: Vec<frontend::common::parser_tools::content_parser::ParsedBlock>,
519) -> DocumentFragment {
520 use frontend::common::parser_tools::fragment_schema::FragmentList;
521
522 let blocks: Vec<FragmentBlock> = parsed
523 .into_iter()
524 .map(|pb| {
525 let elements: Vec<FragmentElement> = pb
526 .spans
527 .iter()
528 .map(|span| {
529 let content = InlineContent::Text(span.text.clone());
530 let fmt_font_family = if span.code {
531 Some("monospace".into())
532 } else {
533 None
534 };
535 let fmt_font_bold = if span.bold { Some(true) } else { None };
536 let fmt_font_italic = if span.italic { Some(true) } else { None };
537 let fmt_font_underline = if span.underline { Some(true) } else { None };
538 let fmt_font_strikeout = if span.strikeout { Some(true) } else { None };
539 let (fmt_anchor_href, fmt_is_anchor) = if let Some(ref href) = span.link_href {
540 (Some(href.clone()), Some(true))
541 } else {
542 (None, None)
543 };
544
545 FragmentElement {
546 content,
547 fmt_font_family,
548 fmt_font_point_size: None,
549 fmt_font_weight: None,
550 fmt_font_bold,
551 fmt_font_italic,
552 fmt_font_underline,
553 fmt_font_overline: None,
554 fmt_font_strikeout,
555 fmt_letter_spacing: None,
556 fmt_word_spacing: None,
557 fmt_anchor_href,
558 fmt_anchor_names: vec![],
559 fmt_is_anchor,
560 fmt_tooltip: None,
561 fmt_underline_style: None,
562 fmt_vertical_alignment: None,
563 }
564 })
565 .collect();
566
567 let plain_text: String = pb.spans.iter().map(|s| s.text.as_str()).collect();
568
569 let list = pb.list_style.map(|style| FragmentList {
570 style,
571 indent: pb.list_indent as i64,
572 prefix: String::new(),
573 suffix: String::new(),
574 });
575
576 FragmentBlock {
577 plain_text,
578 elements,
579 heading_level: pb.heading_level,
580 list,
581 alignment: None,
582 indent: None,
583 text_indent: None,
584 marker: None,
585 top_margin: None,
586 bottom_margin: None,
587 left_margin: None,
588 right_margin: None,
589 tab_positions: vec![],
590 line_height: pb.line_height,
591 non_breakable_lines: pb.non_breakable_lines,
592 direction: pb.direction,
593 background_color: pb.background_color,
594 is_code_block: None,
595 code_language: None,
596 }
597 })
598 .collect();
599
600 let data = serde_json::to_string(&FragmentData {
601 blocks,
602 tables: vec![],
603 })
604 .expect("fragment serialization should not fail");
605
606 let plain_text = parsed_plain_text_from_data(&data);
607
608 DocumentFragment { data, plain_text }
609}
610
611fn parsed_plain_text_from_data(data: &str) -> String {
613 let fragment_data: FragmentData = match serde_json::from_str(data) {
614 Ok(d) => d,
615 Err(_) => return String::new(),
616 };
617
618 fragment_data
619 .blocks
620 .iter()
621 .map(|b| b.plain_text.as_str())
622 .collect::<Vec<_>>()
623 .join("\n")
624}