1use crate::{InlineContent, ListStyle};
4use frontend::common::parser_tools::fragment_schema::{
5 FragmentBlock, FragmentData, FragmentElement,
6};
7
8#[derive(Debug, Clone)]
14pub struct DocumentFragment {
15 data: String,
16 plain_text: String,
17}
18
19impl DocumentFragment {
20 pub fn new() -> Self {
22 Self {
23 data: String::new(),
24 plain_text: String::new(),
25 }
26 }
27
28 pub fn from_plain_text(text: &str) -> Self {
33 let blocks: Vec<FragmentBlock> = text
34 .split('\n')
35 .map(|line| FragmentBlock {
36 plain_text: line.to_string(),
37 elements: vec![FragmentElement {
38 content: InlineContent::Text(line.to_string()),
39 fmt_font_family: None,
40 fmt_font_point_size: None,
41 fmt_font_weight: None,
42 fmt_font_bold: None,
43 fmt_font_italic: None,
44 fmt_font_underline: None,
45 fmt_font_overline: None,
46 fmt_font_strikeout: None,
47 fmt_letter_spacing: None,
48 fmt_word_spacing: None,
49 fmt_anchor_href: None,
50 fmt_anchor_names: vec![],
51 fmt_is_anchor: None,
52 fmt_tooltip: None,
53 fmt_underline_style: None,
54 fmt_vertical_alignment: None,
55 }],
56 heading_level: None,
57 list: None,
58 alignment: None,
59 indent: None,
60 text_indent: None,
61 marker: None,
62 top_margin: None,
63 bottom_margin: None,
64 left_margin: None,
65 right_margin: None,
66 tab_positions: vec![],
67 })
68 .collect();
69
70 let data = serde_json::to_string(&FragmentData { blocks })
71 .expect("fragment serialization should not fail");
72
73 Self {
74 data,
75 plain_text: text.to_string(),
76 }
77 }
78
79 pub fn from_html(html: &str) -> Self {
81 let parsed = frontend::common::parser_tools::content_parser::parse_html(html);
82 parsed_blocks_to_fragment(parsed)
83 }
84
85 pub fn from_markdown(markdown: &str) -> Self {
87 let parsed = frontend::common::parser_tools::content_parser::parse_markdown(markdown);
88 parsed_blocks_to_fragment(parsed)
89 }
90
91 pub fn from_document(doc: &crate::TextDocument) -> crate::Result<Self> {
93 let inner = doc.inner.lock();
94 let char_count = {
95 let stats =
96 frontend::commands::document_inspection_commands::get_document_stats(&inner.ctx)?;
97 crate::convert::to_usize(stats.character_count)
98 };
99 let dto = frontend::document_inspection::ExtractFragmentDto {
100 position: 0,
101 anchor: crate::convert::to_i64(char_count),
102 };
103 let result =
104 frontend::commands::document_inspection_commands::extract_fragment(&inner.ctx, &dto)?;
105 Ok(Self::from_raw(result.fragment_data, result.plain_text))
106 }
107
108 pub(crate) fn from_raw(data: String, plain_text: String) -> Self {
110 Self { data, plain_text }
111 }
112
113 pub fn to_plain_text(&self) -> &str {
115 &self.plain_text
116 }
117
118 pub fn to_html(&self) -> String {
120 if self.data.is_empty() {
121 return String::from("<html><head><meta charset=\"utf-8\"></head><body></body></html>");
122 }
123
124 let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
125 Ok(d) => d,
126 Err(_) => {
127 return String::from(
128 "<html><head><meta charset=\"utf-8\"></head><body></body></html>",
129 )
130 }
131 };
132
133 let mut body = String::new();
134 let blocks = &fragment_data.blocks;
135 let mut i = 0;
136
137 while i < blocks.len() {
138 let block = &blocks[i];
139
140 if let Some(ref list) = block.list {
141 let is_ordered = is_ordered_list_style(&list.style);
142 let list_tag = if is_ordered { "ol" } else { "ul" };
143 body.push('<');
144 body.push_str(list_tag);
145 body.push('>');
146
147 while i < blocks.len() {
148 let b = &blocks[i];
149 match &b.list {
150 Some(l) if is_ordered_list_style(&l.style) == is_ordered => {
151 body.push_str("<li>");
152 push_inline_html(&mut body, &b.elements);
153 body.push_str("</li>");
154 i += 1;
155 }
156 _ => break,
157 }
158 }
159
160 body.push_str("</");
161 body.push_str(list_tag);
162 body.push('>');
163 } else if let Some(level) = block.heading_level {
164 let n = level.clamp(1, 6);
165 body.push_str(&format!("<h{}>", n));
166 push_inline_html(&mut body, &block.elements);
167 body.push_str(&format!("</h{}>", n));
168 i += 1;
169 } else {
170 let style = block_style_attr(block);
172 if style.is_empty() {
173 body.push_str("<p>");
174 } else {
175 body.push_str(&format!("<p style=\"{}\">", style));
176 }
177 push_inline_html(&mut body, &block.elements);
178 body.push_str("</p>");
179 i += 1;
180 }
181 }
182
183 format!(
184 "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
185 body
186 )
187 }
188
189 pub fn to_markdown(&self) -> String {
191 if self.data.is_empty() {
192 return String::new();
193 }
194
195 let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
196 Ok(d) => d,
197 Err(_) => return String::new(),
198 };
199
200 let mut parts: Vec<String> = Vec::new();
201 let mut prev_was_list = false;
202 let mut list_counter: u32 = 0;
203
204 for block in &fragment_data.blocks {
205 let inline_text = render_inline_markdown(&block.elements);
206 let is_list = block.list.is_some();
207
208 let indent_prefix = match block.indent {
210 Some(n) if n > 0 => " ".repeat(n as usize),
211 _ => String::new(),
212 };
213
214 if let Some(level) = block.heading_level {
215 let n = level.clamp(1, 6) as usize;
216 let prefix = "#".repeat(n);
217 parts.push(format!("{} {}", prefix, inline_text));
218 prev_was_list = false;
219 list_counter = 0;
220 } else if let Some(ref list) = block.list {
221 let is_ordered = is_ordered_list_style(&list.style);
222 if !prev_was_list {
223 list_counter = 0;
224 }
225 if is_ordered {
226 list_counter += 1;
227 parts.push(format!("{}{}. {}", indent_prefix, list_counter, inline_text));
228 } else {
229 parts.push(format!("{}- {}", indent_prefix, inline_text));
230 }
231 prev_was_list = true;
232 } else {
233 if indent_prefix.is_empty() {
235 parts.push(inline_text);
236 } else {
237 parts.push(format!("{}{}", indent_prefix, inline_text));
238 }
239 prev_was_list = false;
240 list_counter = 0;
241 }
242
243 if !is_list {
244 prev_was_list = false;
245 }
246 }
247
248 let mut result = String::new();
250 let blocks = &fragment_data.blocks;
251 for (idx, part) in parts.iter().enumerate() {
252 if idx > 0 {
253 let prev_is_list = blocks[idx - 1].list.is_some();
254 let curr_is_list = blocks[idx].list.is_some();
255 if prev_is_list && curr_is_list {
256 result.push('\n');
257 } else {
258 result.push_str("\n\n");
259 }
260 }
261 result.push_str(part);
262 }
263
264 result
265 }
266
267 pub fn is_empty(&self) -> bool {
269 self.plain_text.is_empty()
270 }
271
272 pub(crate) fn raw_data(&self) -> &str {
274 &self.data
275 }
276}
277
278impl Default for DocumentFragment {
279 fn default() -> Self {
280 Self::new()
281 }
282}
283
284fn is_ordered_list_style(style: &ListStyle) -> bool {
289 matches!(
290 style,
291 ListStyle::Decimal
292 | ListStyle::LowerAlpha
293 | ListStyle::UpperAlpha
294 | ListStyle::LowerRoman
295 | ListStyle::UpperRoman
296 )
297}
298
299fn escape_html(s: &str) -> String {
302 let mut out = String::with_capacity(s.len());
303 for c in s.chars() {
304 match c {
305 '&' => out.push_str("&"),
306 '<' => out.push_str("<"),
307 '>' => out.push_str(">"),
308 '"' => out.push_str("""),
309 '\'' => out.push_str("'"),
310 _ => out.push(c),
311 }
312 }
313 out
314}
315
316fn block_style_attr(block: &FragmentBlock) -> String {
318 use crate::Alignment;
319
320 let mut parts = Vec::new();
321 if let Some(ref alignment) = block.alignment {
322 let value = match alignment {
323 Alignment::Left => "left",
324 Alignment::Right => "right",
325 Alignment::Center => "center",
326 Alignment::Justify => "justify",
327 };
328 parts.push(format!("text-align: {}", value));
329 }
330 if let Some(n) = block.indent
331 && n > 0 {
332 parts.push(format!("margin-left: {}em", n));
333 }
334 if let Some(px) = block.text_indent
335 && px != 0 {
336 parts.push(format!("text-indent: {}px", px));
337 }
338 if let Some(px) = block.top_margin {
339 parts.push(format!("margin-top: {}px", px));
340 }
341 if let Some(px) = block.bottom_margin {
342 parts.push(format!("margin-bottom: {}px", px));
343 }
344 if let Some(px) = block.left_margin {
345 parts.push(format!("margin-left: {}px", px));
346 }
347 if let Some(px) = block.right_margin {
348 parts.push(format!("margin-right: {}px", px));
349 }
350 parts.join("; ")
351}
352
353fn push_inline_html(out: &mut String, elements: &[FragmentElement]) {
354 for elem in elements {
355 let text = match &elem.content {
356 InlineContent::Text(t) => escape_html(t),
357 InlineContent::Image {
358 name,
359 width,
360 height,
361 ..
362 } => {
363 format!(
364 "<img src=\"{}\" width=\"{}\" height=\"{}\">",
365 escape_html(name),
366 width,
367 height
368 )
369 }
370 InlineContent::Empty => String::new(),
371 };
372
373 let is_monospace = elem
374 .fmt_font_family
375 .as_deref()
376 .is_some_and(|f| f == "monospace");
377 let is_bold = elem.fmt_font_bold.unwrap_or(false);
378 let is_italic = elem.fmt_font_italic.unwrap_or(false);
379 let is_underline = elem.fmt_font_underline.unwrap_or(false);
380 let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
381 let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
382
383 let mut result = text;
384
385 if is_monospace {
386 result = format!("<code>{}</code>", result);
387 }
388 if is_bold {
389 result = format!("<strong>{}</strong>", result);
390 }
391 if is_italic {
392 result = format!("<em>{}</em>", result);
393 }
394 if is_underline {
395 result = format!("<u>{}</u>", result);
396 }
397 if is_strikeout {
398 result = format!("<s>{}</s>", result);
399 }
400 if is_anchor
401 && let Some(ref href) = elem.fmt_anchor_href {
402 result = format!("<a href=\"{}\">{}</a>", escape_html(href), result);
403 }
404
405 out.push_str(&result);
406 }
407}
408
409fn escape_markdown(s: &str) -> String {
412 let mut out = String::with_capacity(s.len());
413 for c in s.chars() {
414 if matches!(
415 c,
416 '\\' | '`'
417 | '*'
418 | '_'
419 | '{'
420 | '}'
421 | '['
422 | ']'
423 | '('
424 | ')'
425 | '#'
426 | '+'
427 | '-'
428 | '.'
429 | '!'
430 | '|'
431 | '~'
432 | '<'
433 | '>'
434 ) {
435 out.push('\\');
436 }
437 out.push(c);
438 }
439 out
440}
441
442fn render_inline_markdown(elements: &[FragmentElement]) -> String {
443 let mut out = String::new();
444 for elem in elements {
445 let raw_text = match &elem.content {
446 InlineContent::Text(t) => t.clone(),
447 InlineContent::Image { name, .. } => format!("", name, name),
448 InlineContent::Empty => String::new(),
449 };
450
451 let is_monospace = elem
452 .fmt_font_family
453 .as_deref()
454 .is_some_and(|f| f == "monospace");
455 let is_bold = elem.fmt_font_bold.unwrap_or(false);
456 let is_italic = elem.fmt_font_italic.unwrap_or(false);
457 let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
458 let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
459
460 if is_monospace {
461 out.push('`');
462 out.push_str(&raw_text);
463 out.push('`');
464 } else {
465 let mut text = escape_markdown(&raw_text);
466 if is_bold && is_italic {
467 text = format!("***{}***", text);
468 } else if is_bold {
469 text = format!("**{}**", text);
470 } else if is_italic {
471 text = format!("*{}*", text);
472 }
473 if is_strikeout {
474 text = format!("~~{}~~", text);
475 }
476 if is_anchor {
477 let href = elem.fmt_anchor_href.as_deref().unwrap_or("");
478 out.push_str(&format!("[{}]({})", text, href));
479 } else {
480 out.push_str(&text);
481 }
482 }
483 }
484 out
485}
486
487fn parsed_blocks_to_fragment(
491 parsed: Vec<frontend::common::parser_tools::content_parser::ParsedBlock>,
492) -> DocumentFragment {
493 use frontend::common::parser_tools::fragment_schema::FragmentList;
494
495 let blocks: Vec<FragmentBlock> = parsed
496 .into_iter()
497 .map(|pb| {
498 let elements: Vec<FragmentElement> = pb
499 .spans
500 .iter()
501 .map(|span| {
502 let content = InlineContent::Text(span.text.clone());
503 let fmt_font_family = if span.code {
504 Some("monospace".into())
505 } else {
506 None
507 };
508 let fmt_font_bold = if span.bold { Some(true) } else { None };
509 let fmt_font_italic = if span.italic { Some(true) } else { None };
510 let fmt_font_underline = if span.underline { Some(true) } else { None };
511 let fmt_font_strikeout = if span.strikeout { Some(true) } else { None };
512 let (fmt_anchor_href, fmt_is_anchor) = if let Some(ref href) = span.link_href {
513 (Some(href.clone()), Some(true))
514 } else {
515 (None, None)
516 };
517
518 FragmentElement {
519 content,
520 fmt_font_family,
521 fmt_font_point_size: None,
522 fmt_font_weight: None,
523 fmt_font_bold,
524 fmt_font_italic,
525 fmt_font_underline,
526 fmt_font_overline: None,
527 fmt_font_strikeout,
528 fmt_letter_spacing: None,
529 fmt_word_spacing: None,
530 fmt_anchor_href,
531 fmt_anchor_names: vec![],
532 fmt_is_anchor,
533 fmt_tooltip: None,
534 fmt_underline_style: None,
535 fmt_vertical_alignment: None,
536 }
537 })
538 .collect();
539
540 let plain_text: String = pb.spans.iter().map(|s| s.text.as_str()).collect();
541
542 let list = pb.list_style.map(|style| FragmentList {
543 style,
544 indent: 0,
545 prefix: String::new(),
546 suffix: String::new(),
547 });
548
549 FragmentBlock {
550 plain_text,
551 elements,
552 heading_level: pb.heading_level,
553 list,
554 alignment: None,
555 indent: None,
556 text_indent: None,
557 marker: None,
558 top_margin: None,
559 bottom_margin: None,
560 left_margin: None,
561 right_margin: None,
562 tab_positions: vec![],
563 }
564 })
565 .collect();
566
567 let data = serde_json::to_string(&FragmentData { blocks })
568 .expect("fragment serialization should not fail");
569
570 let plain_text = parsed_plain_text_from_data(&data);
571
572 DocumentFragment { data, plain_text }
573}
574
575fn parsed_plain_text_from_data(data: &str) -> String {
577 let fragment_data: FragmentData = match serde_json::from_str(data) {
578 Ok(d) => d,
579 Err(_) => return String::new(),
580 };
581
582 fragment_data
583 .blocks
584 .iter()
585 .map(|b| b.plain_text.as_str())
586 .collect::<Vec<_>>()
587 .join("\n")
588}