1use crate::{InlineContent, ListStyle};
4use frontend::common::parser_tools::fragment_schema::{
5 FragmentBlock, FragmentData, FragmentElement,
6};
7
8#[derive(Debug, Clone)]
14pub struct DocumentFragment {
15 data: String,
16 plain_text: String,
17}
18
19impl DocumentFragment {
20 pub fn new() -> Self {
22 Self {
23 data: String::new(),
24 plain_text: String::new(),
25 }
26 }
27
28 pub fn from_plain_text(text: &str) -> Self {
33 let blocks: Vec<FragmentBlock> = text
34 .split('\n')
35 .map(|line| FragmentBlock {
36 plain_text: line.to_string(),
37 elements: vec![FragmentElement {
38 content: InlineContent::Text(line.to_string()),
39 fmt_font_family: None,
40 fmt_font_point_size: None,
41 fmt_font_weight: None,
42 fmt_font_bold: None,
43 fmt_font_italic: None,
44 fmt_font_underline: None,
45 fmt_font_overline: None,
46 fmt_font_strikeout: None,
47 fmt_letter_spacing: None,
48 fmt_word_spacing: None,
49 fmt_anchor_href: None,
50 fmt_anchor_names: vec![],
51 fmt_is_anchor: None,
52 fmt_tooltip: None,
53 fmt_underline_style: None,
54 fmt_vertical_alignment: None,
55 }],
56 heading_level: None,
57 list: None,
58 alignment: None,
59 indent: None,
60 text_indent: None,
61 marker: None,
62 top_margin: None,
63 bottom_margin: None,
64 left_margin: None,
65 right_margin: None,
66 tab_positions: vec![],
67 line_height: None,
68 non_breakable_lines: None,
69 direction: None,
70 background_color: None,
71 })
72 .collect();
73
74 let data = serde_json::to_string(&FragmentData { blocks })
75 .expect("fragment serialization should not fail");
76
77 Self {
78 data,
79 plain_text: text.to_string(),
80 }
81 }
82
83 pub fn from_html(html: &str) -> Self {
85 let parsed = frontend::common::parser_tools::content_parser::parse_html(html);
86 parsed_blocks_to_fragment(parsed)
87 }
88
89 pub fn from_markdown(markdown: &str) -> Self {
91 let parsed = frontend::common::parser_tools::content_parser::parse_markdown(markdown);
92 parsed_blocks_to_fragment(parsed)
93 }
94
95 pub fn from_document(doc: &crate::TextDocument) -> crate::Result<Self> {
97 let inner = doc.inner.lock();
98 let char_count = {
99 let stats =
100 frontend::commands::document_inspection_commands::get_document_stats(&inner.ctx)?;
101 crate::convert::to_usize(stats.character_count)
102 };
103 let dto = frontend::document_inspection::ExtractFragmentDto {
104 position: 0,
105 anchor: crate::convert::to_i64(char_count),
106 };
107 let result =
108 frontend::commands::document_inspection_commands::extract_fragment(&inner.ctx, &dto)?;
109 Ok(Self::from_raw(result.fragment_data, result.plain_text))
110 }
111
112 pub(crate) fn from_raw(data: String, plain_text: String) -> Self {
114 Self { data, plain_text }
115 }
116
117 pub fn to_plain_text(&self) -> &str {
119 &self.plain_text
120 }
121
122 pub fn to_html(&self) -> String {
124 if self.data.is_empty() {
125 return String::from("<html><head><meta charset=\"utf-8\"></head><body></body></html>");
126 }
127
128 let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
129 Ok(d) => d,
130 Err(_) => {
131 return String::from(
132 "<html><head><meta charset=\"utf-8\"></head><body></body></html>",
133 );
134 }
135 };
136
137 let mut body = String::new();
138 let blocks = &fragment_data.blocks;
139
140 if blocks.len() == 1 && blocks[0].is_inline_only() {
142 push_inline_html(&mut body, &blocks[0].elements);
143 return format!(
144 "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
145 body
146 );
147 }
148
149 let mut i = 0;
150
151 while i < blocks.len() {
152 let block = &blocks[i];
153
154 if let Some(ref list) = block.list {
155 let is_ordered = is_ordered_list_style(&list.style);
156 let list_tag = if is_ordered { "ol" } else { "ul" };
157 body.push('<');
158 body.push_str(list_tag);
159 body.push('>');
160
161 while i < blocks.len() {
162 let b = &blocks[i];
163 match &b.list {
164 Some(l) if is_ordered_list_style(&l.style) == is_ordered => {
165 body.push_str("<li>");
166 push_inline_html(&mut body, &b.elements);
167 body.push_str("</li>");
168 i += 1;
169 }
170 _ => break,
171 }
172 }
173
174 body.push_str("</");
175 body.push_str(list_tag);
176 body.push('>');
177 } else if let Some(level) = block.heading_level {
178 let n = level.clamp(1, 6);
179 body.push_str(&format!("<h{}>", n));
180 push_inline_html(&mut body, &block.elements);
181 body.push_str(&format!("</h{}>", n));
182 i += 1;
183 } else {
184 let style = block_style_attr(block);
186 if style.is_empty() {
187 body.push_str("<p>");
188 } else {
189 body.push_str(&format!("<p style=\"{}\">", style));
190 }
191 push_inline_html(&mut body, &block.elements);
192 body.push_str("</p>");
193 i += 1;
194 }
195 }
196
197 format!(
198 "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
199 body
200 )
201 }
202
203 pub fn to_markdown(&self) -> String {
205 if self.data.is_empty() {
206 return String::new();
207 }
208
209 let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
210 Ok(d) => d,
211 Err(_) => return String::new(),
212 };
213
214 let mut parts: Vec<String> = Vec::new();
215 let mut prev_was_list = false;
216 let mut list_counter: u32 = 0;
217
218 for block in &fragment_data.blocks {
219 let inline_text = render_inline_markdown(&block.elements);
220 let is_list = block.list.is_some();
221
222 let indent_prefix = match block.indent {
224 Some(n) if n > 0 => " ".repeat(n as usize),
225 _ => String::new(),
226 };
227
228 if let Some(level) = block.heading_level {
229 let n = level.clamp(1, 6) as usize;
230 let prefix = "#".repeat(n);
231 parts.push(format!("{} {}", prefix, inline_text));
232 prev_was_list = false;
233 list_counter = 0;
234 } else if let Some(ref list) = block.list {
235 let is_ordered = is_ordered_list_style(&list.style);
236 if !prev_was_list {
237 list_counter = 0;
238 }
239 if is_ordered {
240 list_counter += 1;
241 parts.push(format!(
242 "{}{}. {}",
243 indent_prefix, list_counter, inline_text
244 ));
245 } else {
246 parts.push(format!("{}- {}", indent_prefix, inline_text));
247 }
248 prev_was_list = true;
249 } else {
250 if indent_prefix.is_empty() {
252 parts.push(inline_text);
253 } else {
254 parts.push(format!("{}{}", indent_prefix, inline_text));
255 }
256 prev_was_list = false;
257 list_counter = 0;
258 }
259
260 if !is_list {
261 prev_was_list = false;
262 }
263 }
264
265 let mut result = String::new();
267 let blocks = &fragment_data.blocks;
268 for (idx, part) in parts.iter().enumerate() {
269 if idx > 0 {
270 let prev_is_list = blocks[idx - 1].list.is_some();
271 let curr_is_list = blocks[idx].list.is_some();
272 if prev_is_list && curr_is_list {
273 result.push('\n');
274 } else {
275 result.push_str("\n\n");
276 }
277 }
278 result.push_str(part);
279 }
280
281 result
282 }
283
284 pub fn is_empty(&self) -> bool {
286 self.plain_text.is_empty()
287 }
288
289 pub(crate) fn raw_data(&self) -> &str {
291 &self.data
292 }
293}
294
295impl Default for DocumentFragment {
296 fn default() -> Self {
297 Self::new()
298 }
299}
300
301fn is_ordered_list_style(style: &ListStyle) -> bool {
306 matches!(
307 style,
308 ListStyle::Decimal
309 | ListStyle::LowerAlpha
310 | ListStyle::UpperAlpha
311 | ListStyle::LowerRoman
312 | ListStyle::UpperRoman
313 )
314}
315
316fn escape_html(s: &str) -> String {
319 let mut out = String::with_capacity(s.len());
320 for c in s.chars() {
321 match c {
322 '&' => out.push_str("&"),
323 '<' => out.push_str("<"),
324 '>' => out.push_str(">"),
325 '"' => out.push_str("""),
326 '\'' => out.push_str("'"),
327 _ => out.push(c),
328 }
329 }
330 out
331}
332
333fn block_style_attr(block: &FragmentBlock) -> String {
335 use crate::Alignment;
336
337 let mut parts = Vec::new();
338 if let Some(ref alignment) = block.alignment {
339 let value = match alignment {
340 Alignment::Left => "left",
341 Alignment::Right => "right",
342 Alignment::Center => "center",
343 Alignment::Justify => "justify",
344 };
345 parts.push(format!("text-align: {}", value));
346 }
347 if let Some(n) = block.indent
348 && n > 0
349 {
350 parts.push(format!("margin-left: {}em", n));
351 }
352 if let Some(px) = block.text_indent
353 && px != 0
354 {
355 parts.push(format!("text-indent: {}px", px));
356 }
357 if let Some(px) = block.top_margin {
358 parts.push(format!("margin-top: {}px", px));
359 }
360 if let Some(px) = block.bottom_margin {
361 parts.push(format!("margin-bottom: {}px", px));
362 }
363 if let Some(px) = block.left_margin {
364 parts.push(format!("margin-left: {}px", px));
365 }
366 if let Some(px) = block.right_margin {
367 parts.push(format!("margin-right: {}px", px));
368 }
369 parts.join("; ")
370}
371
372fn push_inline_html(out: &mut String, elements: &[FragmentElement]) {
373 for elem in elements {
374 let text = match &elem.content {
375 InlineContent::Text(t) => escape_html(t),
376 InlineContent::Image {
377 name,
378 width,
379 height,
380 ..
381 } => {
382 format!(
383 "<img src=\"{}\" width=\"{}\" height=\"{}\">",
384 escape_html(name),
385 width,
386 height
387 )
388 }
389 InlineContent::Empty => String::new(),
390 };
391
392 let is_monospace = elem
393 .fmt_font_family
394 .as_deref()
395 .is_some_and(|f| f == "monospace");
396 let is_bold = elem.fmt_font_bold.unwrap_or(false);
397 let is_italic = elem.fmt_font_italic.unwrap_or(false);
398 let is_underline = elem.fmt_font_underline.unwrap_or(false);
399 let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
400 let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
401
402 let mut result = text;
403
404 if is_monospace {
405 result = format!("<code>{}</code>", result);
406 }
407 if is_bold {
408 result = format!("<strong>{}</strong>", result);
409 }
410 if is_italic {
411 result = format!("<em>{}</em>", result);
412 }
413 if is_underline {
414 result = format!("<u>{}</u>", result);
415 }
416 if is_strikeout {
417 result = format!("<s>{}</s>", result);
418 }
419 if is_anchor && let Some(ref href) = elem.fmt_anchor_href {
420 result = format!("<a href=\"{}\">{}</a>", escape_html(href), result);
421 }
422
423 out.push_str(&result);
424 }
425}
426
427fn escape_markdown(s: &str) -> String {
430 let mut out = String::with_capacity(s.len());
431 for c in s.chars() {
432 if matches!(
433 c,
434 '\\' | '`'
435 | '*'
436 | '_'
437 | '{'
438 | '}'
439 | '['
440 | ']'
441 | '('
442 | ')'
443 | '#'
444 | '+'
445 | '-'
446 | '.'
447 | '!'
448 | '|'
449 | '~'
450 | '<'
451 | '>'
452 ) {
453 out.push('\\');
454 }
455 out.push(c);
456 }
457 out
458}
459
460fn render_inline_markdown(elements: &[FragmentElement]) -> String {
461 let mut out = String::new();
462 for elem in elements {
463 let raw_text = match &elem.content {
464 InlineContent::Text(t) => t.clone(),
465 InlineContent::Image { name, .. } => format!("", name, name),
466 InlineContent::Empty => String::new(),
467 };
468
469 let is_monospace = elem
470 .fmt_font_family
471 .as_deref()
472 .is_some_and(|f| f == "monospace");
473 let is_bold = elem.fmt_font_bold.unwrap_or(false);
474 let is_italic = elem.fmt_font_italic.unwrap_or(false);
475 let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
476 let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
477
478 if is_monospace {
479 out.push('`');
480 out.push_str(&raw_text);
481 out.push('`');
482 } else {
483 let mut text = escape_markdown(&raw_text);
484 if is_bold && is_italic {
485 text = format!("***{}***", text);
486 } else if is_bold {
487 text = format!("**{}**", text);
488 } else if is_italic {
489 text = format!("*{}*", text);
490 }
491 if is_strikeout {
492 text = format!("~~{}~~", text);
493 }
494 if is_anchor {
495 let href = elem.fmt_anchor_href.as_deref().unwrap_or("");
496 out.push_str(&format!("[{}]({})", text, href));
497 } else {
498 out.push_str(&text);
499 }
500 }
501 }
502 out
503}
504
505fn parsed_blocks_to_fragment(
509 parsed: Vec<frontend::common::parser_tools::content_parser::ParsedBlock>,
510) -> DocumentFragment {
511 use frontend::common::parser_tools::fragment_schema::FragmentList;
512
513 let blocks: Vec<FragmentBlock> = parsed
514 .into_iter()
515 .map(|pb| {
516 let elements: Vec<FragmentElement> = pb
517 .spans
518 .iter()
519 .map(|span| {
520 let content = InlineContent::Text(span.text.clone());
521 let fmt_font_family = if span.code {
522 Some("monospace".into())
523 } else {
524 None
525 };
526 let fmt_font_bold = if span.bold { Some(true) } else { None };
527 let fmt_font_italic = if span.italic { Some(true) } else { None };
528 let fmt_font_underline = if span.underline { Some(true) } else { None };
529 let fmt_font_strikeout = if span.strikeout { Some(true) } else { None };
530 let (fmt_anchor_href, fmt_is_anchor) = if let Some(ref href) = span.link_href {
531 (Some(href.clone()), Some(true))
532 } else {
533 (None, None)
534 };
535
536 FragmentElement {
537 content,
538 fmt_font_family,
539 fmt_font_point_size: None,
540 fmt_font_weight: None,
541 fmt_font_bold,
542 fmt_font_italic,
543 fmt_font_underline,
544 fmt_font_overline: None,
545 fmt_font_strikeout,
546 fmt_letter_spacing: None,
547 fmt_word_spacing: None,
548 fmt_anchor_href,
549 fmt_anchor_names: vec![],
550 fmt_is_anchor,
551 fmt_tooltip: None,
552 fmt_underline_style: None,
553 fmt_vertical_alignment: None,
554 }
555 })
556 .collect();
557
558 let plain_text: String = pb.spans.iter().map(|s| s.text.as_str()).collect();
559
560 let list = pb.list_style.map(|style| FragmentList {
561 style,
562 indent: 0,
563 prefix: String::new(),
564 suffix: String::new(),
565 });
566
567 FragmentBlock {
568 plain_text,
569 elements,
570 heading_level: pb.heading_level,
571 list,
572 alignment: None,
573 indent: None,
574 text_indent: None,
575 marker: None,
576 top_margin: None,
577 bottom_margin: None,
578 left_margin: None,
579 right_margin: None,
580 tab_positions: vec![],
581 line_height: pb.line_height,
582 non_breakable_lines: pb.non_breakable_lines,
583 direction: pb.direction,
584 background_color: pb.background_color,
585 }
586 })
587 .collect();
588
589 let data = serde_json::to_string(&FragmentData { blocks })
590 .expect("fragment serialization should not fail");
591
592 let plain_text = parsed_plain_text_from_data(&data);
593
594 DocumentFragment { data, plain_text }
595}
596
597fn parsed_plain_text_from_data(data: &str) -> String {
599 let fragment_data: FragmentData = match serde_json::from_str(data) {
600 Ok(d) => d,
601 Err(_) => return String::new(),
602 };
603
604 fragment_data
605 .blocks
606 .iter()
607 .map(|b| b.plain_text.as_str())
608 .collect::<Vec<_>>()
609 .join("\n")
610}