1use crate::{InlineContent, ListStyle};
4use frontend::common::parser_tools::fragment_schema::{
5 FragmentBlock, FragmentData, FragmentElement,
6};
7
8#[derive(Debug, Clone)]
14pub struct DocumentFragment {
15 data: String,
16 plain_text: String,
17}
18
19impl DocumentFragment {
20 pub fn new() -> Self {
22 Self {
23 data: String::new(),
24 plain_text: String::new(),
25 }
26 }
27
28 pub fn from_plain_text(text: &str) -> Self {
33 let blocks: Vec<FragmentBlock> = text
34 .split('\n')
35 .map(|line| FragmentBlock {
36 plain_text: line.to_string(),
37 elements: vec![FragmentElement {
38 content: InlineContent::Text(line.to_string()),
39 fmt_font_family: None,
40 fmt_font_point_size: None,
41 fmt_font_weight: None,
42 fmt_font_bold: None,
43 fmt_font_italic: None,
44 fmt_font_underline: None,
45 fmt_font_overline: None,
46 fmt_font_strikeout: None,
47 fmt_letter_spacing: None,
48 fmt_word_spacing: None,
49 fmt_anchor_href: None,
50 fmt_anchor_names: vec![],
51 fmt_is_anchor: None,
52 fmt_tooltip: None,
53 fmt_underline_style: None,
54 fmt_vertical_alignment: None,
55 }],
56 heading_level: None,
57 list: None,
58 alignment: None,
59 indent: None,
60 text_indent: None,
61 marker: None,
62 top_margin: None,
63 bottom_margin: None,
64 left_margin: None,
65 right_margin: None,
66 tab_positions: vec![],
67 })
68 .collect();
69
70 let data = serde_json::to_string(&FragmentData { blocks })
71 .expect("fragment serialization should not fail");
72
73 Self {
74 data,
75 plain_text: text.to_string(),
76 }
77 }
78
79 pub fn from_html(html: &str) -> Self {
81 let parsed = frontend::common::parser_tools::content_parser::parse_html(html);
82 parsed_blocks_to_fragment(parsed)
83 }
84
85 pub fn from_markdown(markdown: &str) -> Self {
87 let parsed = frontend::common::parser_tools::content_parser::parse_markdown(markdown);
88 parsed_blocks_to_fragment(parsed)
89 }
90
91 pub fn from_document(doc: &crate::TextDocument) -> crate::Result<Self> {
93 let inner = doc.inner.lock();
94 let char_count = {
95 let stats =
96 frontend::commands::document_inspection_commands::get_document_stats(&inner.ctx)?;
97 crate::convert::to_usize(stats.character_count)
98 };
99 let dto = frontend::document_inspection::ExtractFragmentDto {
100 position: 0,
101 anchor: crate::convert::to_i64(char_count),
102 };
103 let result =
104 frontend::commands::document_inspection_commands::extract_fragment(&inner.ctx, &dto)?;
105 Ok(Self::from_raw(result.fragment_data, result.plain_text))
106 }
107
108 pub(crate) fn from_raw(data: String, plain_text: String) -> Self {
110 Self { data, plain_text }
111 }
112
113 pub fn to_plain_text(&self) -> &str {
115 &self.plain_text
116 }
117
118 pub fn to_html(&self) -> String {
120 if self.data.is_empty() {
121 return String::from("<html><head><meta charset=\"utf-8\"></head><body></body></html>");
122 }
123
124 let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
125 Ok(d) => d,
126 Err(_) => {
127 return String::from(
128 "<html><head><meta charset=\"utf-8\"></head><body></body></html>",
129 );
130 }
131 };
132
133 let mut body = String::new();
134 let blocks = &fragment_data.blocks;
135 let mut i = 0;
136
137 while i < blocks.len() {
138 let block = &blocks[i];
139
140 if let Some(ref list) = block.list {
141 let is_ordered = is_ordered_list_style(&list.style);
142 let list_tag = if is_ordered { "ol" } else { "ul" };
143 body.push('<');
144 body.push_str(list_tag);
145 body.push('>');
146
147 while i < blocks.len() {
148 let b = &blocks[i];
149 match &b.list {
150 Some(l) if is_ordered_list_style(&l.style) == is_ordered => {
151 body.push_str("<li>");
152 push_inline_html(&mut body, &b.elements);
153 body.push_str("</li>");
154 i += 1;
155 }
156 _ => break,
157 }
158 }
159
160 body.push_str("</");
161 body.push_str(list_tag);
162 body.push('>');
163 } else if let Some(level) = block.heading_level {
164 let n = level.clamp(1, 6);
165 body.push_str(&format!("<h{}>", n));
166 push_inline_html(&mut body, &block.elements);
167 body.push_str(&format!("</h{}>", n));
168 i += 1;
169 } else {
170 let style = block_style_attr(block);
172 if style.is_empty() {
173 body.push_str("<p>");
174 } else {
175 body.push_str(&format!("<p style=\"{}\">", style));
176 }
177 push_inline_html(&mut body, &block.elements);
178 body.push_str("</p>");
179 i += 1;
180 }
181 }
182
183 format!(
184 "<html><head><meta charset=\"utf-8\"></head><body>{}</body></html>",
185 body
186 )
187 }
188
189 pub fn to_markdown(&self) -> String {
191 if self.data.is_empty() {
192 return String::new();
193 }
194
195 let fragment_data: FragmentData = match serde_json::from_str(&self.data) {
196 Ok(d) => d,
197 Err(_) => return String::new(),
198 };
199
200 let mut parts: Vec<String> = Vec::new();
201 let mut prev_was_list = false;
202 let mut list_counter: u32 = 0;
203
204 for block in &fragment_data.blocks {
205 let inline_text = render_inline_markdown(&block.elements);
206 let is_list = block.list.is_some();
207
208 let indent_prefix = match block.indent {
210 Some(n) if n > 0 => " ".repeat(n as usize),
211 _ => String::new(),
212 };
213
214 if let Some(level) = block.heading_level {
215 let n = level.clamp(1, 6) as usize;
216 let prefix = "#".repeat(n);
217 parts.push(format!("{} {}", prefix, inline_text));
218 prev_was_list = false;
219 list_counter = 0;
220 } else if let Some(ref list) = block.list {
221 let is_ordered = is_ordered_list_style(&list.style);
222 if !prev_was_list {
223 list_counter = 0;
224 }
225 if is_ordered {
226 list_counter += 1;
227 parts.push(format!(
228 "{}{}. {}",
229 indent_prefix, list_counter, inline_text
230 ));
231 } else {
232 parts.push(format!("{}- {}", indent_prefix, inline_text));
233 }
234 prev_was_list = true;
235 } else {
236 if indent_prefix.is_empty() {
238 parts.push(inline_text);
239 } else {
240 parts.push(format!("{}{}", indent_prefix, inline_text));
241 }
242 prev_was_list = false;
243 list_counter = 0;
244 }
245
246 if !is_list {
247 prev_was_list = false;
248 }
249 }
250
251 let mut result = String::new();
253 let blocks = &fragment_data.blocks;
254 for (idx, part) in parts.iter().enumerate() {
255 if idx > 0 {
256 let prev_is_list = blocks[idx - 1].list.is_some();
257 let curr_is_list = blocks[idx].list.is_some();
258 if prev_is_list && curr_is_list {
259 result.push('\n');
260 } else {
261 result.push_str("\n\n");
262 }
263 }
264 result.push_str(part);
265 }
266
267 result
268 }
269
270 pub fn is_empty(&self) -> bool {
272 self.plain_text.is_empty()
273 }
274
275 pub(crate) fn raw_data(&self) -> &str {
277 &self.data
278 }
279}
280
281impl Default for DocumentFragment {
282 fn default() -> Self {
283 Self::new()
284 }
285}
286
287fn is_ordered_list_style(style: &ListStyle) -> bool {
292 matches!(
293 style,
294 ListStyle::Decimal
295 | ListStyle::LowerAlpha
296 | ListStyle::UpperAlpha
297 | ListStyle::LowerRoman
298 | ListStyle::UpperRoman
299 )
300}
301
302fn escape_html(s: &str) -> String {
305 let mut out = String::with_capacity(s.len());
306 for c in s.chars() {
307 match c {
308 '&' => out.push_str("&"),
309 '<' => out.push_str("<"),
310 '>' => out.push_str(">"),
311 '"' => out.push_str("""),
312 '\'' => out.push_str("'"),
313 _ => out.push(c),
314 }
315 }
316 out
317}
318
319fn block_style_attr(block: &FragmentBlock) -> String {
321 use crate::Alignment;
322
323 let mut parts = Vec::new();
324 if let Some(ref alignment) = block.alignment {
325 let value = match alignment {
326 Alignment::Left => "left",
327 Alignment::Right => "right",
328 Alignment::Center => "center",
329 Alignment::Justify => "justify",
330 };
331 parts.push(format!("text-align: {}", value));
332 }
333 if let Some(n) = block.indent
334 && n > 0
335 {
336 parts.push(format!("margin-left: {}em", n));
337 }
338 if let Some(px) = block.text_indent
339 && px != 0
340 {
341 parts.push(format!("text-indent: {}px", px));
342 }
343 if let Some(px) = block.top_margin {
344 parts.push(format!("margin-top: {}px", px));
345 }
346 if let Some(px) = block.bottom_margin {
347 parts.push(format!("margin-bottom: {}px", px));
348 }
349 if let Some(px) = block.left_margin {
350 parts.push(format!("margin-left: {}px", px));
351 }
352 if let Some(px) = block.right_margin {
353 parts.push(format!("margin-right: {}px", px));
354 }
355 parts.join("; ")
356}
357
358fn push_inline_html(out: &mut String, elements: &[FragmentElement]) {
359 for elem in elements {
360 let text = match &elem.content {
361 InlineContent::Text(t) => escape_html(t),
362 InlineContent::Image {
363 name,
364 width,
365 height,
366 ..
367 } => {
368 format!(
369 "<img src=\"{}\" width=\"{}\" height=\"{}\">",
370 escape_html(name),
371 width,
372 height
373 )
374 }
375 InlineContent::Empty => String::new(),
376 };
377
378 let is_monospace = elem
379 .fmt_font_family
380 .as_deref()
381 .is_some_and(|f| f == "monospace");
382 let is_bold = elem.fmt_font_bold.unwrap_or(false);
383 let is_italic = elem.fmt_font_italic.unwrap_or(false);
384 let is_underline = elem.fmt_font_underline.unwrap_or(false);
385 let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
386 let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
387
388 let mut result = text;
389
390 if is_monospace {
391 result = format!("<code>{}</code>", result);
392 }
393 if is_bold {
394 result = format!("<strong>{}</strong>", result);
395 }
396 if is_italic {
397 result = format!("<em>{}</em>", result);
398 }
399 if is_underline {
400 result = format!("<u>{}</u>", result);
401 }
402 if is_strikeout {
403 result = format!("<s>{}</s>", result);
404 }
405 if is_anchor && let Some(ref href) = elem.fmt_anchor_href {
406 result = format!("<a href=\"{}\">{}</a>", escape_html(href), result);
407 }
408
409 out.push_str(&result);
410 }
411}
412
413fn escape_markdown(s: &str) -> String {
416 let mut out = String::with_capacity(s.len());
417 for c in s.chars() {
418 if matches!(
419 c,
420 '\\' | '`'
421 | '*'
422 | '_'
423 | '{'
424 | '}'
425 | '['
426 | ']'
427 | '('
428 | ')'
429 | '#'
430 | '+'
431 | '-'
432 | '.'
433 | '!'
434 | '|'
435 | '~'
436 | '<'
437 | '>'
438 ) {
439 out.push('\\');
440 }
441 out.push(c);
442 }
443 out
444}
445
446fn render_inline_markdown(elements: &[FragmentElement]) -> String {
447 let mut out = String::new();
448 for elem in elements {
449 let raw_text = match &elem.content {
450 InlineContent::Text(t) => t.clone(),
451 InlineContent::Image { name, .. } => format!("", name, name),
452 InlineContent::Empty => String::new(),
453 };
454
455 let is_monospace = elem
456 .fmt_font_family
457 .as_deref()
458 .is_some_and(|f| f == "monospace");
459 let is_bold = elem.fmt_font_bold.unwrap_or(false);
460 let is_italic = elem.fmt_font_italic.unwrap_or(false);
461 let is_strikeout = elem.fmt_font_strikeout.unwrap_or(false);
462 let is_anchor = elem.fmt_is_anchor.unwrap_or(false);
463
464 if is_monospace {
465 out.push('`');
466 out.push_str(&raw_text);
467 out.push('`');
468 } else {
469 let mut text = escape_markdown(&raw_text);
470 if is_bold && is_italic {
471 text = format!("***{}***", text);
472 } else if is_bold {
473 text = format!("**{}**", text);
474 } else if is_italic {
475 text = format!("*{}*", text);
476 }
477 if is_strikeout {
478 text = format!("~~{}~~", text);
479 }
480 if is_anchor {
481 let href = elem.fmt_anchor_href.as_deref().unwrap_or("");
482 out.push_str(&format!("[{}]({})", text, href));
483 } else {
484 out.push_str(&text);
485 }
486 }
487 }
488 out
489}
490
491fn parsed_blocks_to_fragment(
495 parsed: Vec<frontend::common::parser_tools::content_parser::ParsedBlock>,
496) -> DocumentFragment {
497 use frontend::common::parser_tools::fragment_schema::FragmentList;
498
499 let blocks: Vec<FragmentBlock> = parsed
500 .into_iter()
501 .map(|pb| {
502 let elements: Vec<FragmentElement> = pb
503 .spans
504 .iter()
505 .map(|span| {
506 let content = InlineContent::Text(span.text.clone());
507 let fmt_font_family = if span.code {
508 Some("monospace".into())
509 } else {
510 None
511 };
512 let fmt_font_bold = if span.bold { Some(true) } else { None };
513 let fmt_font_italic = if span.italic { Some(true) } else { None };
514 let fmt_font_underline = if span.underline { Some(true) } else { None };
515 let fmt_font_strikeout = if span.strikeout { Some(true) } else { None };
516 let (fmt_anchor_href, fmt_is_anchor) = if let Some(ref href) = span.link_href {
517 (Some(href.clone()), Some(true))
518 } else {
519 (None, None)
520 };
521
522 FragmentElement {
523 content,
524 fmt_font_family,
525 fmt_font_point_size: None,
526 fmt_font_weight: None,
527 fmt_font_bold,
528 fmt_font_italic,
529 fmt_font_underline,
530 fmt_font_overline: None,
531 fmt_font_strikeout,
532 fmt_letter_spacing: None,
533 fmt_word_spacing: None,
534 fmt_anchor_href,
535 fmt_anchor_names: vec![],
536 fmt_is_anchor,
537 fmt_tooltip: None,
538 fmt_underline_style: None,
539 fmt_vertical_alignment: None,
540 }
541 })
542 .collect();
543
544 let plain_text: String = pb.spans.iter().map(|s| s.text.as_str()).collect();
545
546 let list = pb.list_style.map(|style| FragmentList {
547 style,
548 indent: 0,
549 prefix: String::new(),
550 suffix: String::new(),
551 });
552
553 FragmentBlock {
554 plain_text,
555 elements,
556 heading_level: pb.heading_level,
557 list,
558 alignment: None,
559 indent: None,
560 text_indent: None,
561 marker: None,
562 top_margin: None,
563 bottom_margin: None,
564 left_margin: None,
565 right_margin: None,
566 tab_positions: vec![],
567 }
568 })
569 .collect();
570
571 let data = serde_json::to_string(&FragmentData { blocks })
572 .expect("fragment serialization should not fail");
573
574 let plain_text = parsed_plain_text_from_data(&data);
575
576 DocumentFragment { data, plain_text }
577}
578
579fn parsed_plain_text_from_data(data: &str) -> String {
581 let fragment_data: FragmentData = match serde_json::from_str(data) {
582 Ok(d) => d,
583 Err(_) => return String::new(),
584 };
585
586 fragment_data
587 .blocks
588 .iter()
589 .map(|b| b.plain_text.as_str())
590 .collect::<Vec<_>>()
591 .join("\n")
592}