1use crate::entities::{ListStyle, TextDirection};
2
3#[derive(Debug, Clone, Default)]
5pub struct ParsedSpan {
6 pub text: String,
7 pub bold: bool,
8 pub italic: bool,
9 pub underline: bool,
10 pub strikeout: bool,
11 pub code: bool,
12 pub link_href: Option<String>,
13}
14
15#[derive(Debug, Clone)]
17pub struct ParsedTableCell {
18 pub spans: Vec<ParsedSpan>,
19}
20
21#[derive(Debug, Clone)]
23pub struct ParsedTable {
24 pub header_rows: usize,
26 pub rows: Vec<Vec<ParsedTableCell>>,
28}
29
30#[derive(Debug, Clone)]
32pub enum ParsedElement {
33 Block(ParsedBlock),
34 Table(ParsedTable),
35}
36
37impl ParsedElement {
38 pub fn flatten_to_blocks(elements: Vec<ParsedElement>) -> Vec<ParsedBlock> {
41 let mut blocks = Vec::new();
42 for elem in elements {
43 match elem {
44 ParsedElement::Block(b) => blocks.push(b),
45 ParsedElement::Table(t) => {
46 for row in t.rows {
47 for cell in row {
48 blocks.push(ParsedBlock {
49 spans: cell.spans,
50 heading_level: None,
51 list_style: None,
52 list_indent: 0,
53 is_code_block: false,
54 code_language: None,
55 blockquote_depth: 0,
56 line_height: None,
57 non_breakable_lines: None,
58 direction: None,
59 background_color: None,
60 });
61 }
62 }
63 }
64 }
65 }
66 if blocks.is_empty() {
67 blocks.push(ParsedBlock {
68 spans: vec![ParsedSpan {
69 text: String::new(),
70 ..Default::default()
71 }],
72 heading_level: None,
73 list_style: None,
74 list_indent: 0,
75 is_code_block: false,
76 code_language: None,
77 blockquote_depth: 0,
78 line_height: None,
79 non_breakable_lines: None,
80 direction: None,
81 background_color: None,
82 });
83 }
84 blocks
85 }
86}
87
88#[derive(Debug, Clone)]
90pub struct ParsedBlock {
91 pub spans: Vec<ParsedSpan>,
92 pub heading_level: Option<i64>,
93 pub list_style: Option<ListStyle>,
94 pub list_indent: u32,
95 pub is_code_block: bool,
96 pub code_language: Option<String>,
97 pub blockquote_depth: u32,
98 pub line_height: Option<i64>,
99 pub non_breakable_lines: Option<bool>,
100 pub direction: Option<TextDirection>,
101 pub background_color: Option<String>,
102}
103
104impl ParsedBlock {
105 pub fn is_inline_only(&self) -> bool {
108 self.heading_level.is_none()
109 && self.list_style.is_none()
110 && !self.is_code_block
111 && self.blockquote_depth == 0
112 && self.line_height.is_none()
113 && self.non_breakable_lines.is_none()
114 && self.direction.is_none()
115 && self.background_color.is_none()
116 }
117}
118
119pub fn parse_markdown(markdown: &str) -> Vec<ParsedElement> {
122 use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
123
124 let options =
125 Options::ENABLE_STRIKETHROUGH | Options::ENABLE_TABLES | Options::ENABLE_TASKLISTS;
126 let parser = Parser::new_ext(markdown, options);
127
128 let mut elements: Vec<ParsedElement> = Vec::new();
129 let mut current_spans: Vec<ParsedSpan> = Vec::new();
130 let mut current_heading: Option<i64> = None;
131 let mut current_list_style: Option<ListStyle> = None;
132 let mut is_code_block = false;
133 let mut code_language: Option<String> = None;
134 let mut blockquote_depth: u32 = 0;
135 let mut in_block = false;
136
137 let mut bold = false;
139 let mut italic = false;
140 let mut strikeout = false;
141 let mut link_href: Option<String> = None;
142
143 let mut list_stack: Vec<Option<ListStyle>> = Vec::new();
145 let mut current_list_indent: u32 = 0;
146
147 let mut in_table = false;
149 let mut in_table_head = false;
150 let mut table_rows: Vec<Vec<ParsedTableCell>> = Vec::new();
151 let mut current_row_cells: Vec<ParsedTableCell> = Vec::new();
152 let mut current_cell_spans: Vec<ParsedSpan> = Vec::new();
153 let mut table_header_rows: usize = 0;
154
155 for event in parser {
156 match event {
157 Event::Start(Tag::Paragraph) => {
158 in_block = true;
159 current_heading = None;
160 is_code_block = false;
161 }
162 Event::End(TagEnd::Paragraph) => {
163 if !current_spans.is_empty() || in_block {
164 elements.push(ParsedElement::Block(ParsedBlock {
165 spans: std::mem::take(&mut current_spans),
166 heading_level: current_heading.take(),
167 list_style: current_list_style.clone(),
168 list_indent: current_list_indent,
169 is_code_block: false,
170 code_language: None,
171 blockquote_depth,
172 line_height: None,
173 non_breakable_lines: None,
174 direction: None,
175 background_color: None,
176 }));
177 }
178 in_block = false;
179 current_list_style = None;
180 }
181 Event::Start(Tag::Heading { level, .. }) => {
182 in_block = true;
183 current_heading = Some(heading_level_to_i64(level));
184 is_code_block = false;
185 }
186 Event::End(TagEnd::Heading(_)) => {
187 elements.push(ParsedElement::Block(ParsedBlock {
188 spans: std::mem::take(&mut current_spans),
189 heading_level: current_heading.take(),
190 list_style: None,
191 list_indent: 0,
192 is_code_block: false,
193 code_language: None,
194 blockquote_depth,
195 line_height: None,
196 non_breakable_lines: None,
197 direction: None,
198 background_color: None,
199 }));
200 in_block = false;
201 }
202 Event::Start(Tag::List(ordered)) => {
203 let style = if ordered.is_some() {
204 Some(ListStyle::Decimal)
205 } else {
206 Some(ListStyle::Disc)
207 };
208 list_stack.push(style);
209 }
210 Event::End(TagEnd::List(_)) => {
211 list_stack.pop();
212 }
213 Event::Start(Tag::Item) => {
214 if !current_spans.is_empty() {
217 elements.push(ParsedElement::Block(ParsedBlock {
218 spans: std::mem::take(&mut current_spans),
219 heading_level: None,
220 list_style: current_list_style.clone(),
221 list_indent: current_list_indent,
222 is_code_block: false,
223 code_language: None,
224 blockquote_depth,
225 line_height: None,
226 non_breakable_lines: None,
227 direction: None,
228 background_color: None,
229 }));
230 }
231 in_block = true;
232 current_list_style = list_stack.last().cloned().flatten();
233 current_list_indent = if list_stack.is_empty() {
234 0
235 } else {
236 (list_stack.len() - 1) as u32
237 };
238 }
239 Event::End(TagEnd::Item) => {
240 if !current_spans.is_empty() {
243 elements.push(ParsedElement::Block(ParsedBlock {
244 spans: std::mem::take(&mut current_spans),
245 heading_level: None,
246 list_style: current_list_style.clone(),
247 list_indent: current_list_indent,
248 is_code_block: false,
249 code_language: None,
250 blockquote_depth,
251 line_height: None,
252 non_breakable_lines: None,
253 direction: None,
254 background_color: None,
255 }));
256 }
257 in_block = false;
258 current_list_style = None;
259 }
260 Event::Start(Tag::CodeBlock(kind)) => {
261 in_block = true;
262 is_code_block = true;
263 code_language = match &kind {
264 pulldown_cmark::CodeBlockKind::Fenced(lang) if !lang.is_empty() => {
265 Some(lang.to_string())
266 }
267 _ => None,
268 };
269 }
270 Event::End(TagEnd::CodeBlock) => {
271 if let Some(last) = current_spans.last_mut()
273 && last.text.ends_with('\n')
274 {
275 last.text.truncate(last.text.len() - 1);
276 }
277 elements.push(ParsedElement::Block(ParsedBlock {
278 spans: std::mem::take(&mut current_spans),
279 heading_level: None,
280 list_style: None,
281 list_indent: 0,
282 is_code_block: true,
283 code_language: code_language.take(),
284 blockquote_depth,
285 line_height: None,
286 non_breakable_lines: None,
287 direction: None,
288 background_color: None,
289 }));
290 in_block = false;
291 is_code_block = false;
292 }
293 Event::Start(Tag::Table(_)) => {
295 in_table = true;
296 in_table_head = false;
297 table_rows.clear();
298 current_row_cells.clear();
299 current_cell_spans.clear();
300 table_header_rows = 0;
301 }
302 Event::End(TagEnd::Table) => {
303 elements.push(ParsedElement::Table(ParsedTable {
304 header_rows: table_header_rows,
305 rows: std::mem::take(&mut table_rows),
306 }));
307 in_table = false;
308 }
309 Event::Start(Tag::TableHead) => {
310 in_table_head = true;
311 current_row_cells.clear();
312 }
313 Event::End(TagEnd::TableHead) => {
314 table_rows.push(std::mem::take(&mut current_row_cells));
316 table_header_rows += 1;
317 in_table_head = false;
318 }
319 Event::Start(Tag::TableRow) => {
320 current_row_cells.clear();
321 }
322 Event::End(TagEnd::TableRow) => {
323 if !in_table_head {
325 table_rows.push(std::mem::take(&mut current_row_cells));
326 }
327 }
328 Event::Start(Tag::TableCell) => {
329 current_cell_spans.clear();
330 }
331 Event::End(TagEnd::TableCell) => {
332 current_row_cells.push(ParsedTableCell {
333 spans: std::mem::take(&mut current_cell_spans),
334 });
335 }
336 Event::Start(Tag::Emphasis) => {
338 italic = true;
339 }
340 Event::End(TagEnd::Emphasis) => {
341 italic = false;
342 }
343 Event::Start(Tag::Strong) => {
344 bold = true;
345 }
346 Event::End(TagEnd::Strong) => {
347 bold = false;
348 }
349 Event::Start(Tag::Strikethrough) => {
350 strikeout = true;
351 }
352 Event::End(TagEnd::Strikethrough) => {
353 strikeout = false;
354 }
355 Event::Start(Tag::Link { dest_url, .. }) => {
356 link_href = Some(dest_url.to_string());
357 }
358 Event::End(TagEnd::Link) => {
359 link_href = None;
360 }
361 Event::Text(text) => {
362 let span = ParsedSpan {
363 text: text.to_string(),
364 bold,
365 italic,
366 underline: false,
367 strikeout,
368 code: is_code_block,
369 link_href: link_href.clone(),
370 };
371 if in_table {
372 current_cell_spans.push(span);
373 } else {
374 if !in_block {
375 in_block = true;
376 }
377 current_spans.push(span);
378 }
379 }
380 Event::Code(text) => {
381 let span = ParsedSpan {
382 text: text.to_string(),
383 bold,
384 italic,
385 underline: false,
386 strikeout,
387 code: true,
388 link_href: link_href.clone(),
389 };
390 if in_table {
391 current_cell_spans.push(span);
392 } else {
393 if !in_block {
394 in_block = true;
395 }
396 current_spans.push(span);
397 }
398 }
399 Event::SoftBreak => {
400 let span = ParsedSpan {
401 text: " ".to_string(),
402 bold,
403 italic,
404 underline: false,
405 strikeout,
406 code: false,
407 link_href: link_href.clone(),
408 };
409 if in_table {
410 current_cell_spans.push(span);
411 } else {
412 current_spans.push(span);
413 }
414 }
415 Event::HardBreak => {
416 if !current_spans.is_empty() || in_block {
418 elements.push(ParsedElement::Block(ParsedBlock {
419 spans: std::mem::take(&mut current_spans),
420 heading_level: current_heading.take(),
421 list_style: current_list_style.clone(),
422 list_indent: current_list_indent,
423 is_code_block,
424 code_language: code_language.clone(),
425 blockquote_depth,
426 line_height: None,
427 non_breakable_lines: None,
428 direction: None,
429 background_color: None,
430 }));
431 }
432 }
433 Event::Start(Tag::BlockQuote(_)) => {
434 blockquote_depth += 1;
435 }
436 Event::End(TagEnd::BlockQuote(_)) => {
437 blockquote_depth = blockquote_depth.saturating_sub(1);
438 }
439 _ => {}
440 }
441 }
442
443 if !current_spans.is_empty() {
445 elements.push(ParsedElement::Block(ParsedBlock {
446 spans: std::mem::take(&mut current_spans),
447 heading_level: current_heading,
448 list_style: current_list_style,
449 list_indent: current_list_indent,
450 is_code_block,
451 code_language: code_language.take(),
452 blockquote_depth,
453 line_height: None,
454 non_breakable_lines: None,
455 direction: None,
456 background_color: None,
457 }));
458 }
459
460 if elements.is_empty() {
462 elements.push(ParsedElement::Block(ParsedBlock {
463 spans: vec![ParsedSpan {
464 text: String::new(),
465 ..Default::default()
466 }],
467 heading_level: None,
468 list_style: None,
469 list_indent: 0,
470 is_code_block: false,
471 code_language: None,
472 blockquote_depth: 0,
473 line_height: None,
474 non_breakable_lines: None,
475 direction: None,
476 background_color: None,
477 }));
478 }
479
480 elements
481}
482
483fn heading_level_to_i64(level: pulldown_cmark::HeadingLevel) -> i64 {
484 use pulldown_cmark::HeadingLevel;
485 match level {
486 HeadingLevel::H1 => 1,
487 HeadingLevel::H2 => 2,
488 HeadingLevel::H3 => 3,
489 HeadingLevel::H4 => 4,
490 HeadingLevel::H5 => 5,
491 HeadingLevel::H6 => 6,
492 }
493}
494
495use scraper::Node;
498
499#[derive(Debug, Clone, Default)]
501struct BlockStyles {
502 line_height: Option<i64>,
503 non_breakable_lines: Option<bool>,
504 direction: Option<TextDirection>,
505 background_color: Option<String>,
506}
507
508fn parse_block_styles(style: &str) -> BlockStyles {
511 let mut result = BlockStyles::default();
512 for part in style.split(';') {
513 let part = part.trim();
514 if let Some((prop, val)) = part.split_once(':') {
515 let prop = prop.trim().to_ascii_lowercase();
516 let val = val.trim();
517 match prop.as_str() {
518 "line-height" => {
519 if let Ok(v) = val.parse::<f64>() {
521 result.line_height = Some((v * 1000.0) as i64);
522 }
523 }
524 "white-space" => {
525 if val == "pre" || val == "nowrap" || val == "pre-wrap" {
526 result.non_breakable_lines = Some(true);
527 }
528 }
529 "direction" => {
530 if val.eq_ignore_ascii_case("rtl") {
531 result.direction = Some(TextDirection::RightToLeft);
532 } else if val.eq_ignore_ascii_case("ltr") {
533 result.direction = Some(TextDirection::LeftToRight);
534 }
535 }
536 "background-color" | "background" => {
537 result.background_color = Some(val.to_string());
538 }
539 _ => {}
540 }
541 }
542 }
543 result
544}
545
546pub fn parse_html(html: &str) -> Vec<ParsedBlock> {
547 ParsedElement::flatten_to_blocks(parse_html_elements(html))
548}
549
550pub fn parse_html_elements(html: &str) -> Vec<ParsedElement> {
551 use scraper::Html;
552
553 let fragment = Html::parse_fragment(html);
554 let mut elements: Vec<ParsedElement> = Vec::new();
555
556 let root = fragment.root_element();
558
559 #[derive(Clone, Default)]
560 struct FmtState {
561 bold: bool,
562 italic: bool,
563 underline: bool,
564 strikeout: bool,
565 code: bool,
566 link_href: Option<String>,
567 }
568
569 const MAX_RECURSION_DEPTH: usize = 256;
570
571 fn collect_cell_spans(
573 node: ego_tree::NodeRef<Node>,
574 state: &FmtState,
575 spans: &mut Vec<ParsedSpan>,
576 depth: usize,
577 ) {
578 if depth > MAX_RECURSION_DEPTH {
579 return;
580 }
581 for child in node.children() {
582 match child.value() {
583 Node::Text(text) => {
584 let t = text.text.to_string();
585 if !t.is_empty() {
586 spans.push(ParsedSpan {
587 text: t,
588 bold: state.bold,
589 italic: state.italic,
590 underline: state.underline,
591 strikeout: state.strikeout,
592 code: state.code,
593 link_href: state.link_href.clone(),
594 });
595 }
596 }
597 Node::Element(el) => {
598 let tag = el.name();
599 let mut new_state = state.clone();
600 match tag {
601 "b" | "strong" => new_state.bold = true,
602 "i" | "em" => new_state.italic = true,
603 "u" | "ins" => new_state.underline = true,
604 "s" | "del" | "strike" => new_state.strikeout = true,
605 "code" => new_state.code = true,
606 "a" => {
607 if let Some(href) = el.attr("href") {
608 new_state.link_href = Some(href.to_string());
609 }
610 }
611 _ => {}
612 }
613 collect_cell_spans(child, &new_state, spans, depth + 1);
614 }
615 _ => {}
616 }
617 }
618 }
619
620 fn parse_table_element(table_node: ego_tree::NodeRef<Node>) -> ParsedTable {
622 let mut rows: Vec<Vec<ParsedTableCell>> = Vec::new();
623 let mut header_rows: usize = 0;
624
625 fn collect_rows(
626 node: ego_tree::NodeRef<Node>,
627 rows: &mut Vec<Vec<ParsedTableCell>>,
628 header_rows: &mut usize,
629 in_thead: bool,
630 ) {
631 for child in node.children() {
632 if let Node::Element(el) = child.value() {
633 match el.name() {
634 "thead" => collect_rows(child, rows, header_rows, true),
635 "tbody" | "tfoot" => collect_rows(child, rows, header_rows, false),
636 "tr" => {
637 let mut cells: Vec<ParsedTableCell> = Vec::new();
638 for td in child.children() {
639 if let Node::Element(td_el) = td.value()
640 && matches!(td_el.name(), "td" | "th")
641 {
642 let mut spans = Vec::new();
643 let state = FmtState::default();
644 collect_cell_spans(td, &state, &mut spans, 0);
645 if spans.is_empty() {
646 spans.push(ParsedSpan::default());
647 }
648 cells.push(ParsedTableCell { spans });
649 }
650 }
651 if !cells.is_empty() {
652 rows.push(cells);
653 if in_thead {
654 *header_rows += 1;
655 }
656 }
657 }
658 _ => {}
659 }
660 }
661 }
662 }
663
664 collect_rows(table_node, &mut rows, &mut header_rows, false);
665
666 if header_rows == 0 && !rows.is_empty() {
668 header_rows = 1;
669 }
670
671 ParsedTable { header_rows, rows }
672 }
673
674 fn walk_node(
675 node: ego_tree::NodeRef<Node>,
676 state: &FmtState,
677 elements: &mut Vec<ParsedElement>,
678 current_list_style: &Option<ListStyle>,
679 blockquote_depth: u32,
680 list_depth: u32,
681 depth: usize,
682 ) {
683 if depth > MAX_RECURSION_DEPTH {
684 return;
685 }
686 match node.value() {
687 Node::Element(el) => {
688 let tag = el.name();
689 let mut new_state = state.clone();
690 let mut new_list_style = current_list_style.clone();
691 let mut bq_depth = blockquote_depth;
692 let mut new_list_depth = list_depth;
693
694 let is_block_tag = matches!(
696 tag,
697 "p" | "div"
698 | "h1"
699 | "h2"
700 | "h3"
701 | "h4"
702 | "h5"
703 | "h6"
704 | "li"
705 | "pre"
706 | "br"
707 | "blockquote"
708 );
709
710 match tag {
712 "b" | "strong" => new_state.bold = true,
713 "i" | "em" => new_state.italic = true,
714 "u" | "ins" => new_state.underline = true,
715 "s" | "del" | "strike" => new_state.strikeout = true,
716 "code" => new_state.code = true,
717 "a" => {
718 if let Some(href) = el.attr("href") {
719 new_state.link_href = Some(href.to_string());
720 }
721 }
722 "ul" => {
723 new_list_style = Some(ListStyle::Disc);
724 new_list_depth = list_depth + 1;
725 }
726 "ol" => {
727 new_list_style = Some(ListStyle::Decimal);
728 new_list_depth = list_depth + 1;
729 }
730 "blockquote" => {
731 bq_depth += 1;
732 }
733 _ => {}
734 }
735
736 let heading_level = match tag {
738 "h1" => Some(1),
739 "h2" => Some(2),
740 "h3" => Some(3),
741 "h4" => Some(4),
742 "h5" => Some(5),
743 "h6" => Some(6),
744 _ => None,
745 };
746
747 let is_code_block = tag == "pre";
748
749 let code_language = if is_code_block {
751 node.children().find_map(|child| {
752 if let Node::Element(cel) = child.value()
753 && cel.name() == "code"
754 && let Some(cls) = cel.attr("class")
755 {
756 return cls
757 .split_whitespace()
758 .find_map(|c| c.strip_prefix("language-"))
759 .map(|l| l.to_string());
760 }
761 None
762 })
763 } else {
764 None
765 };
766
767 let css = if is_block_tag {
769 el.attr("style").map(parse_block_styles).unwrap_or_default()
770 } else {
771 BlockStyles::default()
772 };
773
774 if tag == "table" {
775 let parsed_table = parse_table_element(node);
777 if !parsed_table.rows.is_empty() {
778 elements.push(ParsedElement::Table(parsed_table));
779 }
780 return;
781 }
782
783 if tag == "br" {
784 elements.push(ParsedElement::Block(ParsedBlock {
786 spans: vec![ParsedSpan {
787 text: String::new(),
788 ..Default::default()
789 }],
790 heading_level: None,
791 list_style: None,
792 list_indent: 0,
793 is_code_block: false,
794 code_language: None,
795 blockquote_depth: bq_depth,
796 line_height: None,
797 non_breakable_lines: None,
798 direction: None,
799 background_color: None,
800 }));
801 return;
802 }
803
804 if tag == "blockquote" {
805 for child in node.children() {
807 walk_node(
808 child,
809 &new_state,
810 elements,
811 &new_list_style,
812 bq_depth,
813 new_list_depth,
814 depth + 1,
815 );
816 }
817 } else if is_block_tag && tag != "br" {
818 let mut spans: Vec<ParsedSpan> = Vec::new();
820 collect_inline_spans(
821 node,
822 &new_state,
823 &mut spans,
824 &new_list_style,
825 elements,
826 bq_depth,
827 new_list_depth,
828 depth + 1,
829 );
830
831 let list_style_for_block = if tag == "li" {
832 new_list_style.clone()
833 } else {
834 None
835 };
836
837 let list_indent_for_block = if tag == "li" {
838 new_list_depth.saturating_sub(1)
839 } else {
840 0
841 };
842
843 if !spans.is_empty() || heading_level.is_some() {
844 elements.push(ParsedElement::Block(ParsedBlock {
845 spans,
846 heading_level,
847 list_style: list_style_for_block,
848 list_indent: list_indent_for_block,
849 is_code_block,
850 code_language,
851 blockquote_depth: bq_depth,
852 line_height: css.line_height,
853 non_breakable_lines: css.non_breakable_lines,
854 direction: css.direction,
855 background_color: css.background_color,
856 }));
857 }
858 } else if matches!(tag, "ul" | "ol" | "thead" | "tbody" | "tr") {
859 for child in node.children() {
861 walk_node(
862 child,
863 &new_state,
864 elements,
865 &new_list_style,
866 bq_depth,
867 new_list_depth,
868 depth + 1,
869 );
870 }
871 } else {
872 for child in node.children() {
874 walk_node(
875 child,
876 &new_state,
877 elements,
878 current_list_style,
879 bq_depth,
880 list_depth,
881 depth + 1,
882 );
883 }
884 }
885 }
886 Node::Text(text) => {
887 let t = text.text.to_string();
888 let trimmed = t.trim();
889 if !trimmed.is_empty() {
890 elements.push(ParsedElement::Block(ParsedBlock {
892 spans: vec![ParsedSpan {
893 text: trimmed.to_string(),
894 bold: state.bold,
895 italic: state.italic,
896 underline: state.underline,
897 strikeout: state.strikeout,
898 code: state.code,
899 link_href: state.link_href.clone(),
900 }],
901 heading_level: None,
902 list_style: None,
903 list_indent: 0,
904 is_code_block: false,
905 code_language: None,
906 blockquote_depth,
907 line_height: None,
908 non_breakable_lines: None,
909 direction: None,
910 background_color: None,
911 }));
912 }
913 }
914 _ => {
915 for child in node.children() {
917 walk_node(
918 child,
919 state,
920 elements,
921 current_list_style,
922 blockquote_depth,
923 list_depth,
924 depth + 1,
925 );
926 }
927 }
928 }
929 }
930
931 #[allow(clippy::too_many_arguments)]
935 fn collect_inline_spans(
936 node: ego_tree::NodeRef<Node>,
937 state: &FmtState,
938 spans: &mut Vec<ParsedSpan>,
939 current_list_style: &Option<ListStyle>,
940 elements: &mut Vec<ParsedElement>,
941 blockquote_depth: u32,
942 list_depth: u32,
943 depth: usize,
944 ) {
945 if depth > MAX_RECURSION_DEPTH {
946 return;
947 }
948 for child in node.children() {
949 match child.value() {
950 Node::Text(text) => {
951 let t = text.text.to_string();
952 if !t.is_empty() {
953 spans.push(ParsedSpan {
954 text: t,
955 bold: state.bold,
956 italic: state.italic,
957 underline: state.underline,
958 strikeout: state.strikeout,
959 code: state.code,
960 link_href: state.link_href.clone(),
961 });
962 }
963 }
964 Node::Element(el) => {
965 let tag = el.name();
966 let mut new_state = state.clone();
967
968 match tag {
969 "b" | "strong" => new_state.bold = true,
970 "i" | "em" => new_state.italic = true,
971 "u" | "ins" => new_state.underline = true,
972 "s" | "del" | "strike" => new_state.strikeout = true,
973 "code" => new_state.code = true,
974 "a" => {
975 if let Some(href) = el.attr("href") {
976 new_state.link_href = Some(href.to_string());
977 }
978 }
979 _ => {}
980 }
981
982 let nested_block = matches!(
984 tag,
985 "p" | "div"
986 | "h1"
987 | "h2"
988 | "h3"
989 | "h4"
990 | "h5"
991 | "h6"
992 | "li"
993 | "pre"
994 | "blockquote"
995 | "ul"
996 | "ol"
997 );
998
999 if tag == "br" {
1000 spans.push(ParsedSpan {
1003 text: String::new(),
1004 ..Default::default()
1005 });
1006 } else if nested_block || tag == "table" {
1007 walk_node(
1009 child,
1010 &new_state,
1011 elements,
1012 current_list_style,
1013 blockquote_depth,
1014 list_depth,
1015 depth + 1,
1016 );
1017 } else {
1018 collect_inline_spans(
1020 child,
1021 &new_state,
1022 spans,
1023 current_list_style,
1024 elements,
1025 blockquote_depth,
1026 list_depth,
1027 depth + 1,
1028 );
1029 }
1030 }
1031 _ => {}
1032 }
1033 }
1034 }
1035
1036 let initial_state = FmtState::default();
1037 for child in root.children() {
1038 walk_node(child, &initial_state, &mut elements, &None, 0, 0, 0);
1039 }
1040
1041 if elements.is_empty() {
1043 elements.push(ParsedElement::Block(ParsedBlock {
1044 spans: vec![ParsedSpan {
1045 text: String::new(),
1046 ..Default::default()
1047 }],
1048 heading_level: None,
1049 list_style: None,
1050 list_indent: 0,
1051 is_code_block: false,
1052 code_language: None,
1053 blockquote_depth: 0,
1054 line_height: None,
1055 non_breakable_lines: None,
1056 direction: None,
1057 background_color: None,
1058 }));
1059 }
1060
1061 elements
1062}
1063
1064#[cfg(test)]
1065mod tests {
1066 use super::*;
1067
1068 fn parse_markdown_blocks(md: &str) -> Vec<ParsedBlock> {
1070 ParsedElement::flatten_to_blocks(parse_markdown(md))
1071 }
1072
1073 #[test]
1074 fn test_parse_markdown_simple_paragraph() {
1075 let blocks = parse_markdown_blocks("Hello **world**");
1076 assert_eq!(blocks.len(), 1);
1077 assert!(blocks[0].spans.len() >= 2);
1078 let plain_span = blocks[0]
1080 .spans
1081 .iter()
1082 .find(|s| s.text.contains("Hello"))
1083 .unwrap();
1084 assert!(!plain_span.bold);
1085 let bold_span = blocks[0].spans.iter().find(|s| s.text == "world").unwrap();
1086 assert!(bold_span.bold);
1087 }
1088
1089 #[test]
1090 fn test_parse_markdown_heading() {
1091 let blocks = parse_markdown_blocks("# Title");
1092 assert_eq!(blocks.len(), 1);
1093 assert_eq!(blocks[0].heading_level, Some(1));
1094 assert_eq!(blocks[0].spans[0].text, "Title");
1095 }
1096
1097 #[test]
1098 fn test_parse_markdown_list() {
1099 let blocks = parse_markdown_blocks("- item1\n- item2");
1100 assert!(blocks.len() >= 2);
1101 assert_eq!(blocks[0].list_style, Some(ListStyle::Disc));
1102 assert_eq!(blocks[1].list_style, Some(ListStyle::Disc));
1103 }
1104
1105 #[test]
1106 fn test_parse_html_simple() {
1107 let blocks = parse_html("<p>Hello <b>world</b></p>");
1108 assert_eq!(blocks.len(), 1);
1109 assert!(blocks[0].spans.len() >= 2);
1110 let bold_span = blocks[0].spans.iter().find(|s| s.text == "world").unwrap();
1111 assert!(bold_span.bold);
1112 }
1113
1114 #[test]
1115 fn test_parse_html_multiple_paragraphs() {
1116 let blocks = parse_html("<p>A</p><p>B</p>");
1117 assert_eq!(blocks.len(), 2);
1118 }
1119
1120 #[test]
1121 fn test_parse_html_heading() {
1122 let blocks = parse_html("<h2>Subtitle</h2>");
1123 assert_eq!(blocks.len(), 1);
1124 assert_eq!(blocks[0].heading_level, Some(2));
1125 }
1126
1127 #[test]
1128 fn test_parse_html_list() {
1129 let blocks = parse_html("<ul><li>one</li><li>two</li></ul>");
1130 assert!(blocks.len() >= 2);
1131 assert_eq!(blocks[0].list_style, Some(ListStyle::Disc));
1132 }
1133
1134 #[test]
1135 fn test_parse_markdown_code_block() {
1136 let blocks = parse_markdown_blocks("```\nfn main() {}\n```");
1137 assert_eq!(blocks.len(), 1);
1138 assert!(blocks[0].is_code_block);
1139 assert!(blocks[0].spans[0].code);
1140 let text: String = blocks[0].spans.iter().map(|s| s.text.as_str()).collect();
1142 assert_eq!(
1143 text, "fn main() {}",
1144 "code block text should not have trailing newline"
1145 );
1146 }
1147
1148 #[test]
1149 fn test_parse_markdown_nested_formatting() {
1150 let blocks = parse_markdown_blocks("***bold italic***");
1151 assert_eq!(blocks.len(), 1);
1152 let span = &blocks[0].spans[0];
1153 assert!(span.bold);
1154 assert!(span.italic);
1155 }
1156
1157 #[test]
1158 fn test_parse_markdown_link() {
1159 let blocks = parse_markdown_blocks("[click](http://example.com)");
1160 assert_eq!(blocks.len(), 1);
1161 let span = &blocks[0].spans[0];
1162 assert_eq!(span.text, "click");
1163 assert_eq!(span.link_href, Some("http://example.com".to_string()));
1164 }
1165
1166 #[test]
1167 fn test_parse_markdown_empty() {
1168 let blocks = parse_markdown_blocks("");
1169 assert_eq!(blocks.len(), 1);
1170 assert!(blocks[0].spans[0].text.is_empty());
1171 }
1172
1173 #[test]
1174 fn test_parse_html_empty() {
1175 let blocks = parse_html("");
1176 assert_eq!(blocks.len(), 1);
1177 assert!(blocks[0].spans[0].text.is_empty());
1178 }
1179
1180 #[test]
1181 fn test_parse_html_nested_formatting() {
1182 let blocks = parse_html("<p><b><i>bold italic</i></b></p>");
1183 assert_eq!(blocks.len(), 1);
1184 let span = &blocks[0].spans[0];
1185 assert!(span.bold);
1186 assert!(span.italic);
1187 }
1188
1189 #[test]
1190 fn test_parse_html_link() {
1191 let blocks = parse_html("<p><a href=\"http://example.com\">click</a></p>");
1192 assert_eq!(blocks.len(), 1);
1193 let span = &blocks[0].spans[0];
1194 assert_eq!(span.text, "click");
1195 assert_eq!(span.link_href, Some("http://example.com".to_string()));
1196 }
1197
1198 #[test]
1199 fn test_parse_html_ordered_list() {
1200 let blocks = parse_html("<ol><li>first</li><li>second</li></ol>");
1201 assert!(blocks.len() >= 2);
1202 assert_eq!(blocks[0].list_style, Some(ListStyle::Decimal));
1203 }
1204
1205 #[test]
1206 fn test_parse_markdown_ordered_list() {
1207 let blocks = parse_markdown_blocks("1. first\n2. second");
1208 assert!(blocks.len() >= 2);
1209 assert_eq!(blocks[0].list_style, Some(ListStyle::Decimal));
1210 }
1211
1212 #[test]
1213 fn test_parse_html_blockquote_nested() {
1214 let blocks = parse_html("<p>before</p><blockquote>quoted</blockquote><p>after</p>");
1215 assert!(blocks.len() >= 3);
1216 }
1217
1218 #[test]
1219 fn test_parse_block_styles_line_height() {
1220 let styles = parse_block_styles("line-height: 1.5");
1221 assert_eq!(styles.line_height, Some(1500));
1222 }
1223
1224 #[test]
1225 fn test_parse_block_styles_direction_rtl() {
1226 let styles = parse_block_styles("direction: rtl");
1227 assert_eq!(styles.direction, Some(TextDirection::RightToLeft));
1228 }
1229
1230 #[test]
1231 fn test_parse_block_styles_background_color() {
1232 let styles = parse_block_styles("background-color: #ff0000");
1233 assert_eq!(styles.background_color, Some("#ff0000".to_string()));
1234 }
1235
1236 #[test]
1237 fn test_parse_block_styles_white_space_pre() {
1238 let styles = parse_block_styles("white-space: pre");
1239 assert_eq!(styles.non_breakable_lines, Some(true));
1240 }
1241
1242 #[test]
1243 fn test_parse_block_styles_multiple() {
1244 let styles = parse_block_styles("line-height: 2.0; direction: rtl; background-color: blue");
1245 assert_eq!(styles.line_height, Some(2000));
1246 assert_eq!(styles.direction, Some(TextDirection::RightToLeft));
1247 assert_eq!(styles.background_color, Some("blue".to_string()));
1248 }
1249
1250 #[test]
1251 fn test_parse_html_block_styles_extracted() {
1252 let blocks = parse_html(
1253 r#"<p style="line-height: 1.5; direction: rtl; background-color: #ccc">text</p>"#,
1254 );
1255 assert_eq!(blocks.len(), 1);
1256 assert_eq!(blocks[0].line_height, Some(1500));
1257 assert_eq!(blocks[0].direction, Some(TextDirection::RightToLeft));
1258 assert_eq!(blocks[0].background_color, Some("#ccc".to_string()));
1259 }
1260
1261 #[test]
1262 fn test_parse_html_white_space_pre() {
1263 let blocks = parse_html(r#"<p style="white-space: pre">code</p>"#);
1264 assert_eq!(blocks.len(), 1);
1265 assert_eq!(blocks[0].non_breakable_lines, Some(true));
1266 }
1267
1268 #[test]
1269 fn test_parse_html_no_styles_returns_none() {
1270 let blocks = parse_html("<p>plain</p>");
1271 assert_eq!(blocks.len(), 1);
1272 assert_eq!(blocks[0].line_height, None);
1273 assert_eq!(blocks[0].direction, None);
1274 assert_eq!(blocks[0].background_color, None);
1275 assert_eq!(blocks[0].non_breakable_lines, None);
1276 }
1277
1278 #[test]
1279 fn test_parse_markdown_nested_list_indent() {
1280 let md = "- top\n - nested\n - deep";
1281 let blocks = parse_markdown_blocks(md);
1282 assert_eq!(blocks.len(), 3);
1283 assert_eq!(blocks[0].list_style, Some(ListStyle::Disc));
1284 assert_eq!(blocks[0].list_indent, 0);
1285 assert_eq!(blocks[1].list_style, Some(ListStyle::Disc));
1286 assert_eq!(blocks[1].list_indent, 1);
1287 assert_eq!(blocks[2].list_style, Some(ListStyle::Disc));
1288 assert_eq!(blocks[2].list_indent, 2);
1289 }
1290
1291 #[test]
1292 fn test_parse_markdown_nested_ordered_list_indent() {
1293 let md = "1. first\n 1. nested\n 2. nested2";
1294 let blocks = parse_markdown_blocks(md);
1295 assert_eq!(blocks.len(), 3);
1296 assert_eq!(blocks[0].list_indent, 0);
1297 assert_eq!(blocks[1].list_indent, 1);
1298 assert_eq!(blocks[2].list_indent, 1);
1299 }
1300
1301 #[test]
1302 fn test_parse_html_nested_list_indent() {
1303 let html = "<ul><li>top</li><ul><li>nested</li></ul></ul>";
1304 let blocks = parse_html(html);
1305 assert!(blocks.len() >= 2);
1306 assert_eq!(blocks[0].list_indent, 0);
1307 assert_eq!(blocks[1].list_indent, 1);
1308 }
1309
1310 #[test]
1311 fn test_parse_markdown_table() {
1312 let md = "| A | B |\n|---|---|\n| 1 | 2 |";
1313 let elements = parse_markdown(md);
1314 assert_eq!(elements.len(), 1);
1315 match &elements[0] {
1316 ParsedElement::Table(table) => {
1317 assert_eq!(table.header_rows, 1);
1318 assert_eq!(table.rows.len(), 2); assert_eq!(table.rows[0].len(), 2);
1321 assert_eq!(table.rows[0][0].spans[0].text, "A");
1322 assert_eq!(table.rows[0][1].spans[0].text, "B");
1323 assert_eq!(table.rows[1].len(), 2);
1325 assert_eq!(table.rows[1][0].spans[0].text, "1");
1326 assert_eq!(table.rows[1][1].spans[0].text, "2");
1327 }
1328 _ => panic!("Expected ParsedElement::Table"),
1329 }
1330 }
1331
1332 #[test]
1333 fn test_parse_markdown_table_with_formatting() {
1334 let md = "| **bold** | `code` | *italic* |\n|---|---|---|\n| ~~strike~~ | plain | [link](http://x.com) |";
1335 let elements = parse_markdown(md);
1336 assert_eq!(elements.len(), 1);
1337 match &elements[0] {
1338 ParsedElement::Table(table) => {
1339 assert_eq!(table.rows.len(), 2);
1340 assert!(table.rows[0][0].spans[0].bold);
1342 assert!(table.rows[0][1].spans[0].code);
1344 assert!(table.rows[0][2].spans[0].italic);
1346 assert!(table.rows[1][0].spans[0].strikeout);
1348 assert_eq!(
1350 table.rows[1][2].spans[0].link_href,
1351 Some("http://x.com".to_string())
1352 );
1353 }
1354 _ => panic!("Expected ParsedElement::Table"),
1355 }
1356 }
1357
1358 #[test]
1359 fn test_parse_markdown_mixed_content_with_table() {
1360 let md = "Before\n\n| A | B |\n|---|---|\n| 1 | 2 |\n\nAfter";
1361 let elements = parse_markdown(md);
1362 assert_eq!(elements.len(), 3);
1363 assert!(matches!(&elements[0], ParsedElement::Block(_)));
1364 assert!(matches!(&elements[1], ParsedElement::Table(_)));
1365 assert!(matches!(&elements[2], ParsedElement::Block(_)));
1366 }
1367}