1use crate::{InlineContent, ListStyle};
4use frontend::common::parser_tools::fragment_schema::{
5 FragmentBlock, FragmentData, FragmentElement,
6};
7
8#[derive(Debug, Clone)]
14pub struct DocumentFragment {
15 data: String,
16 plain_text: String,
17}
18
19impl DocumentFragment {
20 pub fn new() -> Self {
22 Self {
23 data: String::new(),
24 plain_text: String::new(),
25 }
26 }
27
28 pub fn from_plain_text(text: &str) -> Self {
33 let blocks: Vec<FragmentBlock> = text
34 .split('\n')
35 .map(|line| FragmentBlock {
36 plain_text: line.to_string(),
37 elements: vec![FragmentElement {
38 content: InlineContent::Text(line.to_string()),
39 fmt_font_family: None,
40 fmt_font_point_size: None,
41 fmt_font_weight: None,
42 fmt_font_bold: None,
43 fmt_font_italic: None,
44 fmt_font_underline: None,
45 fmt_font_overline: None,
46 fmt_font_strikeout: None,
47 fmt_letter_spacing: None,
48 fmt_word_spacing: None,
49 fmt_anchor_href: None,
50 fmt_anchor_names: vec![],
51 fmt_is_anchor: None,
52 fmt_tooltip: None,
53 fmt_underline_style: None,
54 fmt_vertical_alignment: None,
55 }],
56 heading_level: None,
57 list: None,
58 alignment: None,
59 indent: None,
60 text_indent: None,
61 marker: None,
62 top_margin: None,
63 bottom_margin: None,
64 left_margin: None,
65 right_margin: None,
66 tab_positions: vec![],
67 line_height: None,
68 non_breakable_lines: None,
69 direction: None,
70 background_color: None,
71 is_code_block: None,
72 code_language: None,
73 })
74 .collect();
75
76 let data = serde_json::to_string(&FragmentData { blocks })
77 .expect("fragment serialization should not fail");
78
79 Self {
80 data,
81 plain_text: text.to_string(),
82 }
83 }
84
85 pub fn from_html(html: &str) -> Self {
87 let parsed = frontend::common::parser_tools::content_parser::parse_html(html);
88 parsed_blocks_to_fragment(parsed)
89 }
90
91 pub fn from_markdown(markdown: &str) -> Self {
93 let parsed = frontend::common::parser_tools::content_parser::parse_markdown(markdown);
94 parsed_blocks_to_fragment(parsed)
95 }
96
97 pub fn from_document(doc: &crate::TextDocument) -> crate::Result<Self> {
99 let inner = doc.inner.lock();
100 let char_count = {
101 let stats =
102 frontend::commands::document_inspection_commands::get_document_stats(&inner.ctx)?;
103 crate::convert::to_usize(stats.character_count)
104 };
105 let dto = frontend::document_inspection::ExtractFragmentDto {
106 position: 0,
107 anchor: crate::convert::to_i64(char_count),
108 };
109 let result =
110 frontend::commands::document_inspection_commands::extract_fragment(&inner.ctx, &dto)?;
111 Ok(Self::from_raw(result.fragment_data, result.plain_text))
112 }
113
114 pub(crate) fn from_raw(data: String, plain_text: String) -> Self {
116 Self { data, plain_text }
117 }
118
119 pub fn to_plain_text(&self) -> &str {
121 &self.plain_text
122 }
123
124 pub fn to_html(&self) -> String {
126 if self.data.is_empty() {
127 return String::from("<html><head><meta charset=\"utf-8\"></head><body></body></html>");
128 }
129
130 let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
131 Ok(d) => d,
132 Err(_) => {
133 return String::from(
134 "<html><head><meta charset=\"utf-8\"></head><body></body></html>",
135 );
136 }
137 };
138
139 let mut body = String::new();
140 let blocks = &fragment_data.blocks;
141
142 if blocks.len() == 1 && blocks[0].is_inline_only() {
144 push_inline_html(&mut body, &blocks[0].elements);
145 return format!(
146 "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
147 body
148 );
149 }
150
151 let mut i = 0;
152
153 while i < blocks.len() {
154 let block = &blocks[i];
155
156 if let Some(ref list) = block.list {
157 let is_ordered = is_ordered_list_style(&list.style);
158 let list_tag = if is_ordered { "ol" } else { "ul" };
159 body.push('<');
160 body.push_str(list_tag);
161 body.push('>');
162
163 while i < blocks.len() {
164 let b = &blocks[i];
165 match &b.list {
166 Some(l) if is_ordered_list_style(&l.style) == is_ordered => {
167 body.push_str("<li>");
168 push_inline_html(&mut body, &b.elements);
169 body.push_str("</li>");
170 i += 1;
171 }
172 _ => break,
173 }
174 }
175
176 body.push_str("</");
177 body.push_str(list_tag);
178 body.push('>');
179 } else if let Some(level) = block.heading_level {
180 let n = level.clamp(1, 6);
181 body.push_str(&format!("<h{}>", n));
182 push_inline_html(&mut body, &block.elements);
183 body.push_str(&format!("</h{}>", n));
184 i += 1;
185 } else {
186 let style = block_style_attr(block);
188 if style.is_empty() {
189 body.push_str("<p>");
190 } else {
191 body.push_str(&format!("<p style=\"{}\">", style));
192 }
193 push_inline_html(&mut body, &block.elements);
194 body.push_str("</p>");
195 i += 1;
196 }
197 }
198
199 format!(
200 "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
201 body
202 )
203 }
204
205 pub fn to_markdown(&self) -> String {
207 if self.data.is_empty() {
208 return String::new();
209 }
210
211 let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
212 Ok(d) => d,
213 Err(_) => return String::new(),
214 };
215
216 let mut parts: Vec<String> = Vec::new();
217 let mut prev_was_list = false;
218 let mut list_counter: u32 = 0;
219
220 for block in &fragment_data.blocks {
221 let inline_text = render_inline_markdown(&block.elements);
222 let is_list = block.list.is_some();
223
224 let indent_prefix = match block.indent {
226 Some(n) if n > 0 => " ".repeat(n as usize),
227 _ => String::new(),
228 };
229
230 if let Some(level) = block.heading_level {
231 let n = level.clamp(1, 6) as usize;
232 let prefix = "#".repeat(n);
233 parts.push(format!("{} {}", prefix, inline_text));
234 prev_was_list = false;
235 list_counter = 0;
236 } else if let Some(ref list) = block.list {
237 let is_ordered = is_ordered_list_style(&list.style);
238 if !prev_was_list {
239 list_counter = 0;
240 }
241 if is_ordered {
242 list_counter += 1;
243 parts.push(format!(
244 "{}{}. {}",
245 indent_prefix, list_counter, inline_text
246 ));
247 } else {
248 parts.push(format!("{}- {}", indent_prefix, inline_text));
249 }
250 prev_was_list = true;
251 } else {
252 if indent_prefix.is_empty() {
254 parts.push(inline_text);
255 } else {
256 parts.push(format!("{}{}", indent_prefix, inline_text));
257 }
258 prev_was_list = false;
259 list_counter = 0;
260 }
261
262 if !is_list {
263 prev_was_list = false;
264 }
265 }
266
267 let mut result = String::new();
269 let blocks = &fragment_data.blocks;
270 for (idx, part) in parts.iter().enumerate() {
271 if idx > 0 {
272 let prev_is_list = blocks[idx - 1].list.is_some();
273 let curr_is_list = blocks[idx].list.is_some();
274 if prev_is_list && curr_is_list {
275 result.push('\n');
276 } else {
277 result.push_str("\n\n");
278 }
279 }
280 result.push_str(part);
281 }
282
283 result
284 }
285
286 pub fn is_empty(&self) -> bool {
288 self.plain_text.is_empty()
289 }
290
291 pub(crate) fn raw_data(&self) -> &str {
293 &self.data
294 }
295}
296
297impl Default for DocumentFragment {
298 fn default() -> Self {
299 Self::new()
300 }
301}
302
303fn is_ordered_list_style(style: &ListStyle) -> bool {
308 matches!(
309 style,
310 ListStyle::Decimal
311 | ListStyle::LowerAlpha
312 | ListStyle::UpperAlpha
313 | ListStyle::LowerRoman
314 | ListStyle::UpperRoman
315 )
316}
317
318fn escape_html(s: &str) -> String {
321 let mut out = String::with_capacity(s.len());
322 for c in s.chars() {
323 match c {
324 '&' => out.push_str("&"),
325 '<' => out.push_str("<"),
326 '>' => out.push_str(">"),
327 '"' => out.push_str("""),
328 '\'' => out.push_str("'"),
329 _ => out.push(c),
330 }
331 }
332 out
333}
334
335fn block_style_attr(block: &FragmentBlock) -> String {
337 use crate::Alignment;
338
339 let mut parts = Vec::new();
340 if let Some(ref alignment) = block.alignment {
341 let value = match alignment {
342 Alignment::Left => "left",
343 Alignment::Right => "right",
344 Alignment::Center => "center",
345 Alignment::Justify => "justify",
346 };
347 parts.push(format!("text-align: {}", value));
348 }
349 if let Some(n) = block.indent
350 && n > 0
351 {
352 parts.push(format!("margin-left: {}em", n));
353 }
354 if let Some(px) = block.text_indent
355 && px != 0
356 {
357 parts.push(format!("text-indent: {}px", px));
358 }
359 if let Some(px) = block.top_margin {
360 parts.push(format!("margin-top: {}px", px));
361 }
362 if let Some(px) = block.bottom_margin {
363 parts.push(format!("margin-bottom: {}px", px));
364 }
365 if let Some(px) = block.left_margin {
366 parts.push(format!("margin-left: {}px", px));
367 }
368 if let Some(px) = block.right_margin {
369 parts.push(format!("margin-right: {}px", px));
370 }
371 parts.join("; ")
372}
373
374fn push_inline_html(out: &mut String, elements: &[FragmentElement]) {
375 for elem in elements {
376 let text = match &elem.content {
377 InlineContent::Text(t) => escape_html(t),
378 InlineContent::Image {
379 name,
380 width,
381 height,
382 ..
383 } => {
384 format!(
385 "<img src=\"{}\" width=\"{}\" height=\"{}\">",
386 escape_html(name),
387 width,
388 height
389 )
390 }
391 InlineContent::Empty => String::new(),
392 };
393
394 let is_monospace = elem
395 .fmt_font_family
396 .as_deref()
397 .is_some_and(|f| f == "monospace");
398 let is_bold = elem.fmt_font_bold.unwrap_or(false);
399 let is_italic = elem.fmt_font_italic.unwrap_or(false);
400 let is_underline = elem.fmt_font_underline.unwrap_or(false);
401 let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
402 let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
403
404 let mut result = text;
405
406 if is_monospace {
407 result = format!("<code>{}</code>", result);
408 }
409 if is_bold {
410 result = format!("<strong>{}</strong>", result);
411 }
412 if is_italic {
413 result = format!("<em>{}</em>", result);
414 }
415 if is_underline {
416 result = format!("<u>{}</u>", result);
417 }
418 if is_strikeout {
419 result = format!("<s>{}</s>", result);
420 }
421 if is_anchor && let Some(ref href) = elem.fmt_anchor_href {
422 result = format!("<a href=\"{}\">{}</a>", escape_html(href), result);
423 }
424
425 out.push_str(&result);
426 }
427}
428
429fn escape_markdown(s: &str) -> String {
432 let mut out = String::with_capacity(s.len());
433 for c in s.chars() {
434 if matches!(
435 c,
436 '\\' | '`'
437 | '*'
438 | '_'
439 | '{'
440 | '}'
441 | '['
442 | ']'
443 | '('
444 | ')'
445 | '#'
446 | '+'
447 | '-'
448 | '.'
449 | '!'
450 | '|'
451 | '~'
452 | '<'
453 | '>'
454 ) {
455 out.push('\\');
456 }
457 out.push(c);
458 }
459 out
460}
461
462fn render_inline_markdown(elements: &[FragmentElement]) -> String {
463 let mut out = String::new();
464 for elem in elements {
465 let raw_text = match &elem.content {
466 InlineContent::Text(t) => t.clone(),
467 InlineContent::Image { name, .. } => format!("", name, name),
468 InlineContent::Empty => String::new(),
469 };
470
471 let is_monospace = elem
472 .fmt_font_family
473 .as_deref()
474 .is_some_and(|f| f == "monospace");
475 let is_bold = elem.fmt_font_bold.unwrap_or(false);
476 let is_italic = elem.fmt_font_italic.unwrap_or(false);
477 let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
478 let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
479
480 if is_monospace {
481 out.push('`');
482 out.push_str(&raw_text);
483 out.push('`');
484 } else {
485 let mut text = escape_markdown(&raw_text);
486 if is_bold && is_italic {
487 text = format!("***{}***", text);
488 } else if is_bold {
489 text = format!("**{}**", text);
490 } else if is_italic {
491 text = format!("*{}*", text);
492 }
493 if is_strikeout {
494 text = format!("~~{}~~", text);
495 }
496 if is_anchor {
497 let href = elem.fmt_anchor_href.as_deref().unwrap_or("");
498 out.push_str(&format!("[{}]({})", text, href));
499 } else {
500 out.push_str(&text);
501 }
502 }
503 }
504 out
505}
506
507fn parsed_blocks_to_fragment(
511 parsed: Vec<frontend::common::parser_tools::content_parser::ParsedBlock>,
512) -> DocumentFragment {
513 use frontend::common::parser_tools::fragment_schema::FragmentList;
514
515 let blocks: Vec<FragmentBlock> = parsed
516 .into_iter()
517 .map(|pb| {
518 let elements: Vec<FragmentElement> = pb
519 .spans
520 .iter()
521 .map(|span| {
522 let content = InlineContent::Text(span.text.clone());
523 let fmt_font_family = if span.code {
524 Some("monospace".into())
525 } else {
526 None
527 };
528 let fmt_font_bold = if span.bold { Some(true) } else { None };
529 let fmt_font_italic = if span.italic { Some(true) } else { None };
530 let fmt_font_underline = if span.underline { Some(true) } else { None };
531 let fmt_font_strikeout = if span.strikeout { Some(true) } else { None };
532 let (fmt_anchor_href, fmt_is_anchor) = if let Some(ref href) = span.link_href {
533 (Some(href.clone()), Some(true))
534 } else {
535 (None, None)
536 };
537
538 FragmentElement {
539 content,
540 fmt_font_family,
541 fmt_font_point_size: None,
542 fmt_font_weight: None,
543 fmt_font_bold,
544 fmt_font_italic,
545 fmt_font_underline,
546 fmt_font_overline: None,
547 fmt_font_strikeout,
548 fmt_letter_spacing: None,
549 fmt_word_spacing: None,
550 fmt_anchor_href,
551 fmt_anchor_names: vec![],
552 fmt_is_anchor,
553 fmt_tooltip: None,
554 fmt_underline_style: None,
555 fmt_vertical_alignment: None,
556 }
557 })
558 .collect();
559
560 let plain_text: String = pb.spans.iter().map(|s| s.text.as_str()).collect();
561
562 let list = pb.list_style.map(|style| FragmentList {
563 style,
564 indent: 0,
565 prefix: String::new(),
566 suffix: String::new(),
567 });
568
569 FragmentBlock {
570 plain_text,
571 elements,
572 heading_level: pb.heading_level,
573 list,
574 alignment: None,
575 indent: None,
576 text_indent: None,
577 marker: None,
578 top_margin: None,
579 bottom_margin: None,
580 left_margin: None,
581 right_margin: None,
582 tab_positions: vec![],
583 line_height: pb.line_height,
584 non_breakable_lines: pb.non_breakable_lines,
585 direction: pb.direction,
586 background_color: pb.background_color,
587 is_code_block: None,
588 code_language: None,
589 }
590 })
591 .collect();
592
593 let data = serde_json::to_string(&FragmentData { blocks })
594 .expect("fragment serialization should not fail");
595
596 let plain_text = parsed_plain_text_from_data(&data);
597
598 DocumentFragment { data, plain_text }
599}
600
601fn parsed_plain_text_from_data(data: &str) -> String {
603 let fragment_data: FragmentData = match serde_json::from_str(data) {
604 Ok(d) => d,
605 Err(_) => return String::new(),
606 };
607
608 fragment_data
609 .blocks
610 .iter()
611 .map(|b| b.plain_text.as_str())
612 .collect::<Vec<_>>()
613 .join("\n")
614}