1use crate::{InlineContent, ListStyle};
4use frontend::common::parser_tools::fragment_schema::{
5 FragmentBlock, FragmentData, FragmentElement,
6};
7
8#[derive(Debug, Clone)]
14pub struct DocumentFragment {
15 data: String,
16 plain_text: String,
17}
18
19impl DocumentFragment {
20 pub fn new() -> Self {
22 Self {
23 data: String::new(),
24 plain_text: String::new(),
25 }
26 }
27
28 pub fn from_plain_text(text: &str) -> Self {
33 let blocks: Vec<FragmentBlock> = text
34 .split('\n')
35 .map(|line| FragmentBlock {
36 plain_text: line.to_string(),
37 elements: vec![FragmentElement {
38 content: InlineContent::Text(line.to_string()),
39 fmt_font_family: None,
40 fmt_font_point_size: None,
41 fmt_font_weight: None,
42 fmt_font_bold: None,
43 fmt_font_italic: None,
44 fmt_font_underline: None,
45 fmt_font_overline: None,
46 fmt_font_strikeout: None,
47 fmt_letter_spacing: None,
48 fmt_word_spacing: None,
49 fmt_anchor_href: None,
50 fmt_anchor_names: vec![],
51 fmt_is_anchor: None,
52 fmt_tooltip: None,
53 fmt_underline_style: None,
54 fmt_vertical_alignment: None,
55 }],
56 heading_level: None,
57 list: None,
58 alignment: None,
59 indent: None,
60 text_indent: None,
61 marker: None,
62 top_margin: None,
63 bottom_margin: None,
64 left_margin: None,
65 right_margin: None,
66 tab_positions: vec![],
67 line_height: None,
68 non_breakable_lines: None,
69 direction: None,
70 background_color: None,
71 })
72 .collect();
73
74 let data = serde_json::to_string(&FragmentData { blocks })
75 .expect("fragment serialization should not fail");
76
77 Self {
78 data,
79 plain_text: text.to_string(),
80 }
81 }
82
83 pub fn from_html(html: &str) -> Self {
85 let parsed = frontend::common::parser_tools::content_parser::parse_html(html);
86 parsed_blocks_to_fragment(parsed)
87 }
88
89 pub fn from_markdown(markdown: &str) -> Self {
91 let parsed = frontend::common::parser_tools::content_parser::parse_markdown(markdown);
92 parsed_blocks_to_fragment(parsed)
93 }
94
95 pub fn from_document(doc: &crate::TextDocument) -> crate::Result<Self> {
97 let inner = doc.inner.lock();
98 let char_count = {
99 let stats =
100 frontend::commands::document_inspection_commands::get_document_stats(&inner.ctx)?;
101 crate::convert::to_usize(stats.character_count)
102 };
103 let dto = frontend::document_inspection::ExtractFragmentDto {
104 position: 0,
105 anchor: crate::convert::to_i64(char_count),
106 };
107 let result =
108 frontend::commands::document_inspection_commands::extract_fragment(&inner.ctx, &dto)?;
109 Ok(Self::from_raw(result.fragment_data, result.plain_text))
110 }
111
112 pub(crate) fn from_raw(data: String, plain_text: String) -> Self {
114 Self { data, plain_text }
115 }
116
117 pub fn to_plain_text(&self) -> &str {
119 &self.plain_text
120 }
121
122 pub fn to_html(&self) -> String {
124 if self.data.is_empty() {
125 return String::from("<html><head><meta charset=\"utf-8\"></head><body></body></html>");
126 }
127
128 let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
129 Ok(d) => d,
130 Err(_) => {
131 return String::from(
132 "<html><head><meta charset=\"utf-8\"></head><body></body></html>",
133 );
134 }
135 };
136
137 let mut body = String::new();
138 let blocks = &fragment_data.blocks;
139 let mut i = 0;
140
141 while i < blocks.len() {
142 let block = &blocks[i];
143
144 if let Some(ref list) = block.list {
145 let is_ordered = is_ordered_list_style(&list.style);
146 let list_tag = if is_ordered { "ol" } else { "ul" };
147 body.push('<');
148 body.push_str(list_tag);
149 body.push('>');
150
151 while i < blocks.len() {
152 let b = &blocks[i];
153 match &b.list {
154 Some(l) if is_ordered_list_style(&l.style) == is_ordered => {
155 body.push_str("<li>");
156 push_inline_html(&mut body, &b.elements);
157 body.push_str("</li>");
158 i += 1;
159 }
160 _ => break,
161 }
162 }
163
164 body.push_str("</");
165 body.push_str(list_tag);
166 body.push('>');
167 } else if let Some(level) = block.heading_level {
168 let n = level.clamp(1, 6);
169 body.push_str(&format!("<h{}>", n));
170 push_inline_html(&mut body, &block.elements);
171 body.push_str(&format!("</h{}>", n));
172 i += 1;
173 } else {
174 let style = block_style_attr(block);
176 if style.is_empty() {
177 body.push_str("<p>");
178 } else {
179 body.push_str(&format!("<p style=\"{}\">", style));
180 }
181 push_inline_html(&mut body, &block.elements);
182 body.push_str("</p>");
183 i += 1;
184 }
185 }
186
187 format!(
188 "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
189 body
190 )
191 }
192
193 pub fn to_markdown(&self) -> String {
195 if self.data.is_empty() {
196 return String::new();
197 }
198
199 let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
200 Ok(d) => d,
201 Err(_) => return String::new(),
202 };
203
204 let mut parts: Vec<String> = Vec::new();
205 let mut prev_was_list = false;
206 let mut list_counter: u32 = 0;
207
208 for block in &fragment_data.blocks {
209 let inline_text = render_inline_markdown(&block.elements);
210 let is_list = block.list.is_some();
211
212 let indent_prefix = match block.indent {
214 Some(n) if n > 0 => " ".repeat(n as usize),
215 _ => String::new(),
216 };
217
218 if let Some(level) = block.heading_level {
219 let n = level.clamp(1, 6) as usize;
220 let prefix = "#".repeat(n);
221 parts.push(format!("{} {}", prefix, inline_text));
222 prev_was_list = false;
223 list_counter = 0;
224 } else if let Some(ref list) = block.list {
225 let is_ordered = is_ordered_list_style(&list.style);
226 if !prev_was_list {
227 list_counter = 0;
228 }
229 if is_ordered {
230 list_counter += 1;
231 parts.push(format!(
232 "{}{}. {}",
233 indent_prefix, list_counter, inline_text
234 ));
235 } else {
236 parts.push(format!("{}- {}", indent_prefix, inline_text));
237 }
238 prev_was_list = true;
239 } else {
240 if indent_prefix.is_empty() {
242 parts.push(inline_text);
243 } else {
244 parts.push(format!("{}{}", indent_prefix, inline_text));
245 }
246 prev_was_list = false;
247 list_counter = 0;
248 }
249
250 if !is_list {
251 prev_was_list = false;
252 }
253 }
254
255 let mut result = String::new();
257 let blocks = &fragment_data.blocks;
258 for (idx, part) in parts.iter().enumerate() {
259 if idx > 0 {
260 let prev_is_list = blocks[idx - 1].list.is_some();
261 let curr_is_list = blocks[idx].list.is_some();
262 if prev_is_list && curr_is_list {
263 result.push('\n');
264 } else {
265 result.push_str("\n\n");
266 }
267 }
268 result.push_str(part);
269 }
270
271 result
272 }
273
274 pub fn is_empty(&self) -> bool {
276 self.plain_text.is_empty()
277 }
278
279 pub(crate) fn raw_data(&self) -> &str {
281 &self.data
282 }
283}
284
285impl Default for DocumentFragment {
286 fn default() -> Self {
287 Self::new()
288 }
289}
290
291fn is_ordered_list_style(style: &ListStyle) -> bool {
296 matches!(
297 style,
298 ListStyle::Decimal
299 | ListStyle::LowerAlpha
300 | ListStyle::UpperAlpha
301 | ListStyle::LowerRoman
302 | ListStyle::UpperRoman
303 )
304}
305
306fn escape_html(s: &str) -> String {
309 let mut out = String::with_capacity(s.len());
310 for c in s.chars() {
311 match c {
312 '&' => out.push_str("&"),
313 '<' => out.push_str("<"),
314 '>' => out.push_str(">"),
315 '"' => out.push_str("""),
316 '\'' => out.push_str("'"),
317 _ => out.push(c),
318 }
319 }
320 out
321}
322
323fn block_style_attr(block: &FragmentBlock) -> String {
325 use crate::Alignment;
326
327 let mut parts = Vec::new();
328 if let Some(ref alignment) = block.alignment {
329 let value = match alignment {
330 Alignment::Left => "left",
331 Alignment::Right => "right",
332 Alignment::Center => "center",
333 Alignment::Justify => "justify",
334 };
335 parts.push(format!("text-align: {}", value));
336 }
337 if let Some(n) = block.indent
338 && n > 0
339 {
340 parts.push(format!("margin-left: {}em", n));
341 }
342 if let Some(px) = block.text_indent
343 && px != 0
344 {
345 parts.push(format!("text-indent: {}px", px));
346 }
347 if let Some(px) = block.top_margin {
348 parts.push(format!("margin-top: {}px", px));
349 }
350 if let Some(px) = block.bottom_margin {
351 parts.push(format!("margin-bottom: {}px", px));
352 }
353 if let Some(px) = block.left_margin {
354 parts.push(format!("margin-left: {}px", px));
355 }
356 if let Some(px) = block.right_margin {
357 parts.push(format!("margin-right: {}px", px));
358 }
359 parts.join("; ")
360}
361
362fn push_inline_html(out: &mut String, elements: &[FragmentElement]) {
363 for elem in elements {
364 let text = match &elem.content {
365 InlineContent::Text(t) => escape_html(t),
366 InlineContent::Image {
367 name,
368 width,
369 height,
370 ..
371 } => {
372 format!(
373 "<img src=\"{}\" width=\"{}\" height=\"{}\">",
374 escape_html(name),
375 width,
376 height
377 )
378 }
379 InlineContent::Empty => String::new(),
380 };
381
382 let is_monospace = elem
383 .fmt_font_family
384 .as_deref()
385 .is_some_and(|f| f == "monospace");
386 let is_bold = elem.fmt_font_bold.unwrap_or(false);
387 let is_italic = elem.fmt_font_italic.unwrap_or(false);
388 let is_underline = elem.fmt_font_underline.unwrap_or(false);
389 let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
390 let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
391
392 let mut result = text;
393
394 if is_monospace {
395 result = format!("<code>{}</code>", result);
396 }
397 if is_bold {
398 result = format!("<strong>{}</strong>", result);
399 }
400 if is_italic {
401 result = format!("<em>{}</em>", result);
402 }
403 if is_underline {
404 result = format!("<u>{}</u>", result);
405 }
406 if is_strikeout {
407 result = format!("<s>{}</s>", result);
408 }
409 if is_anchor && let Some(ref href) = elem.fmt_anchor_href {
410 result = format!("<a href=\"{}\">{}</a>", escape_html(href), result);
411 }
412
413 out.push_str(&result);
414 }
415}
416
417fn escape_markdown(s: &str) -> String {
420 let mut out = String::with_capacity(s.len());
421 for c in s.chars() {
422 if matches!(
423 c,
424 '\\' | '`'
425 | '*'
426 | '_'
427 | '{'
428 | '}'
429 | '['
430 | ']'
431 | '('
432 | ')'
433 | '#'
434 | '+'
435 | '-'
436 | '.'
437 | '!'
438 | '|'
439 | '~'
440 | '<'
441 | '>'
442 ) {
443 out.push('\\');
444 }
445 out.push(c);
446 }
447 out
448}
449
450fn render_inline_markdown(elements: &[FragmentElement]) -> String {
451 let mut out = String::new();
452 for elem in elements {
453 let raw_text = match &elem.content {
454 InlineContent::Text(t) => t.clone(),
455 InlineContent::Image { name, .. } => format!("", name, name),
456 InlineContent::Empty => String::new(),
457 };
458
459 let is_monospace = elem
460 .fmt_font_family
461 .as_deref()
462 .is_some_and(|f| f == "monospace");
463 let is_bold = elem.fmt_font_bold.unwrap_or(false);
464 let is_italic = elem.fmt_font_italic.unwrap_or(false);
465 let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
466 let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
467
468 if is_monospace {
469 out.push('`');
470 out.push_str(&raw_text);
471 out.push('`');
472 } else {
473 let mut text = escape_markdown(&raw_text);
474 if is_bold && is_italic {
475 text = format!("***{}***", text);
476 } else if is_bold {
477 text = format!("**{}**", text);
478 } else if is_italic {
479 text = format!("*{}*", text);
480 }
481 if is_strikeout {
482 text = format!("~~{}~~", text);
483 }
484 if is_anchor {
485 let href = elem.fmt_anchor_href.as_deref().unwrap_or("");
486 out.push_str(&format!("[{}]({})", text, href));
487 } else {
488 out.push_str(&text);
489 }
490 }
491 }
492 out
493}
494
495fn parsed_blocks_to_fragment(
499 parsed: Vec<frontend::common::parser_tools::content_parser::ParsedBlock>,
500) -> DocumentFragment {
501 use frontend::common::parser_tools::fragment_schema::FragmentList;
502
503 let blocks: Vec<FragmentBlock> = parsed
504 .into_iter()
505 .map(|pb| {
506 let elements: Vec<FragmentElement> = pb
507 .spans
508 .iter()
509 .map(|span| {
510 let content = InlineContent::Text(span.text.clone());
511 let fmt_font_family = if span.code {
512 Some("monospace".into())
513 } else {
514 None
515 };
516 let fmt_font_bold = if span.bold { Some(true) } else { None };
517 let fmt_font_italic = if span.italic { Some(true) } else { None };
518 let fmt_font_underline = if span.underline { Some(true) } else { None };
519 let fmt_font_strikeout = if span.strikeout { Some(true) } else { None };
520 let (fmt_anchor_href, fmt_is_anchor) = if let Some(ref href) = span.link_href {
521 (Some(href.clone()), Some(true))
522 } else {
523 (None, None)
524 };
525
526 FragmentElement {
527 content,
528 fmt_font_family,
529 fmt_font_point_size: None,
530 fmt_font_weight: None,
531 fmt_font_bold,
532 fmt_font_italic,
533 fmt_font_underline,
534 fmt_font_overline: None,
535 fmt_font_strikeout,
536 fmt_letter_spacing: None,
537 fmt_word_spacing: None,
538 fmt_anchor_href,
539 fmt_anchor_names: vec![],
540 fmt_is_anchor,
541 fmt_tooltip: None,
542 fmt_underline_style: None,
543 fmt_vertical_alignment: None,
544 }
545 })
546 .collect();
547
548 let plain_text: String = pb.spans.iter().map(|s| s.text.as_str()).collect();
549
550 let list = pb.list_style.map(|style| FragmentList {
551 style,
552 indent: 0,
553 prefix: String::new(),
554 suffix: String::new(),
555 });
556
557 FragmentBlock {
558 plain_text,
559 elements,
560 heading_level: pb.heading_level,
561 list,
562 alignment: None,
563 indent: None,
564 text_indent: None,
565 marker: None,
566 top_margin: None,
567 bottom_margin: None,
568 left_margin: None,
569 right_margin: None,
570 tab_positions: vec![],
571 line_height: pb.line_height,
572 non_breakable_lines: pb.non_breakable_lines,
573 direction: pb.direction,
574 background_color: pb.background_color,
575 }
576 })
577 .collect();
578
579 let data = serde_json::to_string(&FragmentData { blocks })
580 .expect("fragment serialization should not fail");
581
582 let plain_text = parsed_plain_text_from_data(&data);
583
584 DocumentFragment { data, plain_text }
585}
586
587fn parsed_plain_text_from_data(data: &str) -> String {
589 let fragment_data: FragmentData = match serde_json::from_str(data) {
590 Ok(d) => d,
591 Err(_) => return String::new(),
592 };
593
594 fragment_data
595 .blocks
596 .iter()
597 .map(|b| b.plain_text.as_str())
598 .collect::<Vec<_>>()
599 .join("\n")
600}