Skip to main content

text_document_common/parser_tools/
content_parser.rs

1use crate::entities::{ListStyle, TextDirection};
2
3/// A parsed inline span with formatting info
4#[derive(Debug, Clone, Default)]
5pub struct ParsedSpan {
6    pub text: String,
7    pub bold: bool,
8    pub italic: bool,
9    pub underline: bool,
10    pub strikeout: bool,
11    pub code: bool,
12    pub link_href: Option<String>,
13}
14
15/// A parsed block (paragraph, heading, list item, code block)
16#[derive(Debug, Clone)]
17pub struct ParsedBlock {
18    pub spans: Vec<ParsedSpan>,
19    pub heading_level: Option<i64>,
20    pub list_style: Option<ListStyle>,
21    pub is_code_block: bool,
22    pub code_language: Option<String>,
23    pub blockquote_depth: u32,
24    pub line_height: Option<i64>,
25    pub non_breakable_lines: Option<bool>,
26    pub direction: Option<TextDirection>,
27    pub background_color: Option<String>,
28}
29
30impl ParsedBlock {
31    /// Returns `true` when this block carries no block-level formatting,
32    /// meaning its content is purely inline.
33    pub fn is_inline_only(&self) -> bool {
34        self.heading_level.is_none()
35            && self.list_style.is_none()
36            && !self.is_code_block
37            && self.blockquote_depth == 0
38            && self.line_height.is_none()
39            && self.non_breakable_lines.is_none()
40            && self.direction.is_none()
41            && self.background_color.is_none()
42    }
43}
44
45// ─── Markdown parsing ────────────────────────────────────────────────
46
47pub fn parse_markdown(markdown: &str) -> Vec<ParsedBlock> {
48    use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
49
50    let options =
51        Options::ENABLE_STRIKETHROUGH | Options::ENABLE_TABLES | Options::ENABLE_TASKLISTS;
52    let parser = Parser::new_ext(markdown, options);
53
54    let mut blocks: Vec<ParsedBlock> = Vec::new();
55    let mut current_spans: Vec<ParsedSpan> = Vec::new();
56    let mut current_heading: Option<i64> = None;
57    let mut current_list_style: Option<ListStyle> = None;
58    let mut is_code_block = false;
59    let mut code_language: Option<String> = None;
60    let mut blockquote_depth: u32 = 0;
61    let mut in_block = false;
62
63    // Formatting state stack
64    let mut bold = false;
65    let mut italic = false;
66    let mut strikeout = false;
67    let mut link_href: Option<String> = None;
68
69    // List style stack for nested lists
70    let mut list_stack: Vec<Option<ListStyle>> = Vec::new();
71
72    for event in parser {
73        match event {
74            Event::Start(Tag::Paragraph) => {
75                in_block = true;
76                current_heading = None;
77                is_code_block = false;
78            }
79            Event::End(TagEnd::Paragraph) => {
80                if !current_spans.is_empty() || in_block {
81                    blocks.push(ParsedBlock {
82                        spans: std::mem::take(&mut current_spans),
83                        heading_level: current_heading.take(),
84                        list_style: current_list_style.clone(),
85                        is_code_block: false,
86                        code_language: None,
87                        blockquote_depth,
88                        line_height: None,
89                        non_breakable_lines: None,
90                        direction: None,
91                        background_color: None,
92                    });
93                }
94                in_block = false;
95                current_list_style = None;
96            }
97            Event::Start(Tag::Heading { level, .. }) => {
98                in_block = true;
99                current_heading = Some(heading_level_to_i64(level));
100                is_code_block = false;
101            }
102            Event::End(TagEnd::Heading(_)) => {
103                blocks.push(ParsedBlock {
104                    spans: std::mem::take(&mut current_spans),
105                    heading_level: current_heading.take(),
106                    list_style: None,
107                    is_code_block: false,
108                    code_language: None,
109                    blockquote_depth,
110                    line_height: None,
111                    non_breakable_lines: None,
112                    direction: None,
113                    background_color: None,
114                });
115                in_block = false;
116            }
117            Event::Start(Tag::List(ordered)) => {
118                let style = if ordered.is_some() {
119                    Some(ListStyle::Decimal)
120                } else {
121                    Some(ListStyle::Disc)
122                };
123                list_stack.push(style);
124            }
125            Event::End(TagEnd::List(_)) => {
126                list_stack.pop();
127            }
128            Event::Start(Tag::Item) => {
129                in_block = true;
130                current_list_style = list_stack.last().cloned().flatten();
131            }
132            Event::End(TagEnd::Item) => {
133                // The paragraph inside the item will have already been flushed,
134                // but if there was no inner paragraph (tight list), flush now.
135                if !current_spans.is_empty() {
136                    blocks.push(ParsedBlock {
137                        spans: std::mem::take(&mut current_spans),
138                        heading_level: None,
139                        list_style: current_list_style.clone(),
140                        is_code_block: false,
141                        code_language: None,
142                        blockquote_depth,
143                        line_height: None,
144                        non_breakable_lines: None,
145                        direction: None,
146                        background_color: None,
147                    });
148                }
149                in_block = false;
150                current_list_style = None;
151            }
152            Event::Start(Tag::CodeBlock(kind)) => {
153                in_block = true;
154                is_code_block = true;
155                code_language = match &kind {
156                    pulldown_cmark::CodeBlockKind::Fenced(lang) if !lang.is_empty() => {
157                        Some(lang.to_string())
158                    }
159                    _ => None,
160                };
161            }
162            Event::End(TagEnd::CodeBlock) => {
163                // pulldown-cmark appends a trailing '\n' to code block text — strip it
164                if let Some(last) = current_spans.last_mut()
165                    && last.text.ends_with('\n')
166                {
167                    last.text.truncate(last.text.len() - 1);
168                }
169                blocks.push(ParsedBlock {
170                    spans: std::mem::take(&mut current_spans),
171                    heading_level: None,
172                    list_style: None,
173                    is_code_block: true,
174                    code_language: code_language.take(),
175                    blockquote_depth,
176                    line_height: None,
177                    non_breakable_lines: None,
178                    direction: None,
179                    background_color: None,
180                });
181                in_block = false;
182                is_code_block = false;
183            }
184            Event::Start(Tag::Emphasis) => {
185                italic = true;
186            }
187            Event::End(TagEnd::Emphasis) => {
188                italic = false;
189            }
190            Event::Start(Tag::Strong) => {
191                bold = true;
192            }
193            Event::End(TagEnd::Strong) => {
194                bold = false;
195            }
196            Event::Start(Tag::Strikethrough) => {
197                strikeout = true;
198            }
199            Event::End(TagEnd::Strikethrough) => {
200                strikeout = false;
201            }
202            Event::Start(Tag::Link { dest_url, .. }) => {
203                link_href = Some(dest_url.to_string());
204            }
205            Event::End(TagEnd::Link) => {
206                link_href = None;
207            }
208            Event::Text(text) => {
209                if !in_block {
210                    // Bare text outside any block — create an implicit paragraph
211                    in_block = true;
212                }
213                current_spans.push(ParsedSpan {
214                    text: text.to_string(),
215                    bold,
216                    italic,
217                    underline: false,
218                    strikeout,
219                    code: is_code_block,
220                    link_href: link_href.clone(),
221                });
222            }
223            Event::Code(text) => {
224                if !in_block {
225                    in_block = true;
226                }
227                current_spans.push(ParsedSpan {
228                    text: text.to_string(),
229                    bold,
230                    italic,
231                    underline: false,
232                    strikeout,
233                    code: true,
234                    link_href: link_href.clone(),
235                });
236            }
237            Event::SoftBreak => {
238                // Add a space
239                current_spans.push(ParsedSpan {
240                    text: " ".to_string(),
241                    bold,
242                    italic,
243                    underline: false,
244                    strikeout,
245                    code: false,
246                    link_href: link_href.clone(),
247                });
248            }
249            Event::HardBreak => {
250                // Finalize current block
251                if !current_spans.is_empty() || in_block {
252                    blocks.push(ParsedBlock {
253                        spans: std::mem::take(&mut current_spans),
254                        heading_level: current_heading.take(),
255                        list_style: current_list_style.clone(),
256                        is_code_block,
257                        code_language: code_language.clone(),
258                        blockquote_depth,
259                        line_height: None,
260                        non_breakable_lines: None,
261                        direction: None,
262                        background_color: None,
263                    });
264                }
265            }
266            Event::Start(Tag::BlockQuote(_)) => {
267                blockquote_depth += 1;
268            }
269            Event::End(TagEnd::BlockQuote(_)) => {
270                blockquote_depth = blockquote_depth.saturating_sub(1);
271            }
272            _ => {}
273        }
274    }
275
276    // Flush any remaining content
277    if !current_spans.is_empty() {
278        blocks.push(ParsedBlock {
279            spans: std::mem::take(&mut current_spans),
280            heading_level: current_heading,
281            list_style: current_list_style,
282            is_code_block,
283            code_language: code_language.take(),
284            blockquote_depth,
285            line_height: None,
286            non_breakable_lines: None,
287            direction: None,
288            background_color: None,
289        });
290    }
291
292    // If no blocks were parsed, create a single empty paragraph
293    if blocks.is_empty() {
294        blocks.push(ParsedBlock {
295            spans: vec![ParsedSpan {
296                text: String::new(),
297                ..Default::default()
298            }],
299            heading_level: None,
300            list_style: None,
301            is_code_block: false,
302            code_language: None,
303            blockquote_depth: 0,
304            line_height: None,
305            non_breakable_lines: None,
306            direction: None,
307            background_color: None,
308        });
309    }
310
311    blocks
312}
313
314fn heading_level_to_i64(level: pulldown_cmark::HeadingLevel) -> i64 {
315    use pulldown_cmark::HeadingLevel;
316    match level {
317        HeadingLevel::H1 => 1,
318        HeadingLevel::H2 => 2,
319        HeadingLevel::H3 => 3,
320        HeadingLevel::H4 => 4,
321        HeadingLevel::H5 => 5,
322        HeadingLevel::H6 => 6,
323    }
324}
325
326// ─── HTML parsing ────────────────────────────────────────────────────
327
328use scraper::Node;
329
330/// Parsed CSS block-level styles from an inline `style` attribute.
331#[derive(Debug, Clone, Default)]
332struct BlockStyles {
333    line_height: Option<i64>,
334    non_breakable_lines: Option<bool>,
335    direction: Option<TextDirection>,
336    background_color: Option<String>,
337}
338
339/// Parse relevant CSS properties from an inline style string.
340/// Handles: line-height, white-space, direction, background-color.
341fn parse_block_styles(style: &str) -> BlockStyles {
342    let mut result = BlockStyles::default();
343    for part in style.split(';') {
344        let part = part.trim();
345        if let Some((prop, val)) = part.split_once(':') {
346            let prop = prop.trim().to_ascii_lowercase();
347            let val = val.trim();
348            match prop.as_str() {
349                "line-height" => {
350                    // Try parsing as a plain number (multiplier)
351                    if let Ok(v) = val.parse::<f64>() {
352                        result.line_height = Some((v * 1000.0) as i64);
353                    }
354                }
355                "white-space" => {
356                    if val == "pre" || val == "nowrap" || val == "pre-wrap" {
357                        result.non_breakable_lines = Some(true);
358                    }
359                }
360                "direction" => {
361                    if val.eq_ignore_ascii_case("rtl") {
362                        result.direction = Some(TextDirection::RightToLeft);
363                    } else if val.eq_ignore_ascii_case("ltr") {
364                        result.direction = Some(TextDirection::LeftToRight);
365                    }
366                }
367                "background-color" | "background" => {
368                    result.background_color = Some(val.to_string());
369                }
370                _ => {}
371            }
372        }
373    }
374    result
375}
376
377pub fn parse_html(html: &str) -> Vec<ParsedBlock> {
378    use scraper::Html;
379
380    let fragment = Html::parse_fragment(html);
381    let mut blocks: Vec<ParsedBlock> = Vec::new();
382
383    // Walk the DOM tree starting from the root
384    let root = fragment.root_element();
385
386    #[derive(Clone, Default)]
387    struct FmtState {
388        bold: bool,
389        italic: bool,
390        underline: bool,
391        strikeout: bool,
392        code: bool,
393        link_href: Option<String>,
394    }
395
396    const MAX_RECURSION_DEPTH: usize = 256;
397
398    fn walk_node(
399        node: ego_tree::NodeRef<Node>,
400        state: &FmtState,
401        blocks: &mut Vec<ParsedBlock>,
402        current_list_style: &Option<ListStyle>,
403        blockquote_depth: u32,
404        depth: usize,
405    ) {
406        if depth > MAX_RECURSION_DEPTH {
407            return;
408        }
409        match node.value() {
410            Node::Element(el) => {
411                let tag = el.name();
412                let mut new_state = state.clone();
413                let mut new_list_style = current_list_style.clone();
414                let mut bq_depth = blockquote_depth;
415
416                // Determine if this is a block-level element
417                let is_block_tag = matches!(
418                    tag,
419                    "p" | "div"
420                        | "h1"
421                        | "h2"
422                        | "h3"
423                        | "h4"
424                        | "h5"
425                        | "h6"
426                        | "li"
427                        | "pre"
428                        | "br"
429                        | "blockquote"
430                );
431
432                // Update formatting state
433                match tag {
434                    "b" | "strong" => new_state.bold = true,
435                    "i" | "em" => new_state.italic = true,
436                    "u" | "ins" => new_state.underline = true,
437                    "s" | "del" | "strike" => new_state.strikeout = true,
438                    "code" => new_state.code = true,
439                    "a" => {
440                        if let Some(href) = el.attr("href") {
441                            new_state.link_href = Some(href.to_string());
442                        }
443                    }
444                    "ul" => {
445                        new_list_style = Some(ListStyle::Disc);
446                    }
447                    "ol" => {
448                        new_list_style = Some(ListStyle::Decimal);
449                    }
450                    "blockquote" => {
451                        bq_depth += 1;
452                    }
453                    _ => {}
454                }
455
456                // Determine heading level
457                let heading_level = match tag {
458                    "h1" => Some(1),
459                    "h2" => Some(2),
460                    "h3" => Some(3),
461                    "h4" => Some(4),
462                    "h5" => Some(5),
463                    "h6" => Some(6),
464                    _ => None,
465                };
466
467                let is_code_block = tag == "pre";
468
469                // Extract code language from <pre><code class="language-xxx">
470                let code_language = if is_code_block {
471                    node.children().find_map(|child| {
472                        if let Node::Element(cel) = child.value()
473                            && cel.name() == "code"
474                            && let Some(cls) = cel.attr("class")
475                        {
476                            return cls
477                                .split_whitespace()
478                                .find_map(|c| c.strip_prefix("language-"))
479                                .map(|l| l.to_string());
480                        }
481                        None
482                    })
483                } else {
484                    None
485                };
486
487                // Extract CSS styles from block-level elements
488                let css = if is_block_tag {
489                    el.attr("style").map(parse_block_styles).unwrap_or_default()
490                } else {
491                    BlockStyles::default()
492                };
493
494                if tag == "br" {
495                    // <br> creates a new block
496                    blocks.push(ParsedBlock {
497                        spans: vec![ParsedSpan {
498                            text: String::new(),
499                            ..Default::default()
500                        }],
501                        heading_level: None,
502                        list_style: None,
503                        is_code_block: false,
504                        code_language: None,
505                        blockquote_depth: bq_depth,
506                        line_height: None,
507                        non_breakable_lines: None,
508                        direction: None,
509                        background_color: None,
510                    });
511                    return;
512                }
513
514                if tag == "blockquote" {
515                    // Blockquote is a container — recurse into children with increased depth
516                    for child in node.children() {
517                        walk_node(
518                            child,
519                            &new_state,
520                            blocks,
521                            &new_list_style,
522                            bq_depth,
523                            depth + 1,
524                        );
525                    }
526                } else if is_block_tag && tag != "br" {
527                    // Start collecting spans for a new block
528                    let mut spans: Vec<ParsedSpan> = Vec::new();
529                    collect_inline_spans(
530                        node,
531                        &new_state,
532                        &mut spans,
533                        &new_list_style,
534                        blocks,
535                        bq_depth,
536                        depth + 1,
537                    );
538
539                    let list_style_for_block = if tag == "li" {
540                        new_list_style.clone()
541                    } else {
542                        None
543                    };
544
545                    if !spans.is_empty() || heading_level.is_some() {
546                        blocks.push(ParsedBlock {
547                            spans,
548                            heading_level,
549                            list_style: list_style_for_block,
550                            is_code_block,
551                            code_language,
552                            blockquote_depth: bq_depth,
553                            line_height: css.line_height,
554                            non_breakable_lines: css.non_breakable_lines,
555                            direction: css.direction,
556                            background_color: css.background_color,
557                        });
558                    }
559                } else if matches!(tag, "ul" | "ol" | "table" | "thead" | "tbody" | "tr") {
560                    // Container elements: recurse into children
561                    for child in node.children() {
562                        walk_node(
563                            child,
564                            &new_state,
565                            blocks,
566                            &new_list_style,
567                            bq_depth,
568                            depth + 1,
569                        );
570                    }
571                } else {
572                    // Inline element or unknown: recurse
573                    for child in node.children() {
574                        walk_node(
575                            child,
576                            &new_state,
577                            blocks,
578                            current_list_style,
579                            bq_depth,
580                            depth + 1,
581                        );
582                    }
583                }
584            }
585            Node::Text(text) => {
586                let t = text.text.to_string();
587                let trimmed = t.trim();
588                if !trimmed.is_empty() {
589                    // Bare text not in a block — create a paragraph
590                    blocks.push(ParsedBlock {
591                        spans: vec![ParsedSpan {
592                            text: trimmed.to_string(),
593                            bold: state.bold,
594                            italic: state.italic,
595                            underline: state.underline,
596                            strikeout: state.strikeout,
597                            code: state.code,
598                            link_href: state.link_href.clone(),
599                        }],
600                        heading_level: None,
601                        list_style: None,
602                        is_code_block: false,
603                        code_language: None,
604                        blockquote_depth,
605                        line_height: None,
606                        non_breakable_lines: None,
607                        direction: None,
608                        background_color: None,
609                    });
610                }
611            }
612            _ => {
613                // Document, Comment, etc. — recurse children
614                for child in node.children() {
615                    walk_node(
616                        child,
617                        state,
618                        blocks,
619                        current_list_style,
620                        blockquote_depth,
621                        depth + 1,
622                    );
623                }
624            }
625        }
626    }
627
628    /// Collect inline spans from a block-level element's children.
629    /// If a nested block-level element is encountered, it is flushed as a
630    /// separate block.
631    fn collect_inline_spans(
632        node: ego_tree::NodeRef<Node>,
633        state: &FmtState,
634        spans: &mut Vec<ParsedSpan>,
635        current_list_style: &Option<ListStyle>,
636        blocks: &mut Vec<ParsedBlock>,
637        blockquote_depth: u32,
638        depth: usize,
639    ) {
640        if depth > MAX_RECURSION_DEPTH {
641            return;
642        }
643        for child in node.children() {
644            match child.value() {
645                Node::Text(text) => {
646                    let t = text.text.to_string();
647                    if !t.is_empty() {
648                        spans.push(ParsedSpan {
649                            text: t,
650                            bold: state.bold,
651                            italic: state.italic,
652                            underline: state.underline,
653                            strikeout: state.strikeout,
654                            code: state.code,
655                            link_href: state.link_href.clone(),
656                        });
657                    }
658                }
659                Node::Element(el) => {
660                    let tag = el.name();
661                    let mut new_state = state.clone();
662
663                    match tag {
664                        "b" | "strong" => new_state.bold = true,
665                        "i" | "em" => new_state.italic = true,
666                        "u" | "ins" => new_state.underline = true,
667                        "s" | "del" | "strike" => new_state.strikeout = true,
668                        "code" => new_state.code = true,
669                        "a" => {
670                            if let Some(href) = el.attr("href") {
671                                new_state.link_href = Some(href.to_string());
672                            }
673                        }
674                        _ => {}
675                    }
676
677                    // Check for nested block elements
678                    let nested_block = matches!(
679                        tag,
680                        "p" | "div"
681                            | "h1"
682                            | "h2"
683                            | "h3"
684                            | "h4"
685                            | "h5"
686                            | "h6"
687                            | "li"
688                            | "pre"
689                            | "blockquote"
690                            | "ul"
691                            | "ol"
692                    );
693
694                    if tag == "br" {
695                        // br within a block: treat as splitting into new block
696                        // For simplicity, just add a newline to current span
697                        spans.push(ParsedSpan {
698                            text: String::new(),
699                            ..Default::default()
700                        });
701                    } else if nested_block {
702                        // Flush as separate block
703                        walk_node(
704                            child,
705                            &new_state,
706                            blocks,
707                            current_list_style,
708                            blockquote_depth,
709                            depth + 1,
710                        );
711                    } else {
712                        // Inline element: recurse
713                        collect_inline_spans(
714                            child,
715                            &new_state,
716                            spans,
717                            current_list_style,
718                            blocks,
719                            blockquote_depth,
720                            depth + 1,
721                        );
722                    }
723                }
724                _ => {}
725            }
726        }
727    }
728
729    let initial_state = FmtState::default();
730    for child in root.children() {
731        walk_node(child, &initial_state, &mut blocks, &None, 0, 0);
732    }
733
734    // If no blocks were parsed, create a single empty paragraph
735    if blocks.is_empty() {
736        blocks.push(ParsedBlock {
737            spans: vec![ParsedSpan {
738                text: String::new(),
739                ..Default::default()
740            }],
741            heading_level: None,
742            list_style: None,
743            is_code_block: false,
744            code_language: None,
745            blockquote_depth: 0,
746            line_height: None,
747            non_breakable_lines: None,
748            direction: None,
749            background_color: None,
750        });
751    }
752
753    blocks
754}
755
756#[cfg(test)]
757mod tests {
758    use super::*;
759
760    #[test]
761    fn test_parse_markdown_simple_paragraph() {
762        let blocks = parse_markdown("Hello **world**");
763        assert_eq!(blocks.len(), 1);
764        assert!(blocks[0].spans.len() >= 2);
765        // "Hello " is plain, "world" is bold
766        let plain_span = blocks[0]
767            .spans
768            .iter()
769            .find(|s| s.text.contains("Hello"))
770            .unwrap();
771        assert!(!plain_span.bold);
772        let bold_span = blocks[0].spans.iter().find(|s| s.text == "world").unwrap();
773        assert!(bold_span.bold);
774    }
775
776    #[test]
777    fn test_parse_markdown_heading() {
778        let blocks = parse_markdown("# Title");
779        assert_eq!(blocks.len(), 1);
780        assert_eq!(blocks[0].heading_level, Some(1));
781        assert_eq!(blocks[0].spans[0].text, "Title");
782    }
783
784    #[test]
785    fn test_parse_markdown_list() {
786        let blocks = parse_markdown("- item1\n- item2");
787        assert!(blocks.len() >= 2);
788        assert_eq!(blocks[0].list_style, Some(ListStyle::Disc));
789        assert_eq!(blocks[1].list_style, Some(ListStyle::Disc));
790    }
791
792    #[test]
793    fn test_parse_html_simple() {
794        let blocks = parse_html("<p>Hello <b>world</b></p>");
795        assert_eq!(blocks.len(), 1);
796        assert!(blocks[0].spans.len() >= 2);
797        let bold_span = blocks[0].spans.iter().find(|s| s.text == "world").unwrap();
798        assert!(bold_span.bold);
799    }
800
801    #[test]
802    fn test_parse_html_multiple_paragraphs() {
803        let blocks = parse_html("<p>A</p><p>B</p>");
804        assert_eq!(blocks.len(), 2);
805    }
806
807    #[test]
808    fn test_parse_html_heading() {
809        let blocks = parse_html("<h2>Subtitle</h2>");
810        assert_eq!(blocks.len(), 1);
811        assert_eq!(blocks[0].heading_level, Some(2));
812    }
813
814    #[test]
815    fn test_parse_html_list() {
816        let blocks = parse_html("<ul><li>one</li><li>two</li></ul>");
817        assert!(blocks.len() >= 2);
818        assert_eq!(blocks[0].list_style, Some(ListStyle::Disc));
819    }
820
821    #[test]
822    fn test_parse_markdown_code_block() {
823        let blocks = parse_markdown("```\nfn main() {}\n```");
824        assert_eq!(blocks.len(), 1);
825        assert!(blocks[0].is_code_block);
826        assert!(blocks[0].spans[0].code);
827        // pulldown-cmark appends a trailing \n to code block text — verify it's stripped
828        let text: String = blocks[0].spans.iter().map(|s| s.text.as_str()).collect();
829        assert_eq!(
830            text, "fn main() {}",
831            "code block text should not have trailing newline"
832        );
833    }
834
835    #[test]
836    fn test_parse_markdown_nested_formatting() {
837        let blocks = parse_markdown("***bold italic***");
838        assert_eq!(blocks.len(), 1);
839        let span = &blocks[0].spans[0];
840        assert!(span.bold);
841        assert!(span.italic);
842    }
843
844    #[test]
845    fn test_parse_markdown_link() {
846        let blocks = parse_markdown("[click](http://example.com)");
847        assert_eq!(blocks.len(), 1);
848        let span = &blocks[0].spans[0];
849        assert_eq!(span.text, "click");
850        assert_eq!(span.link_href, Some("http://example.com".to_string()));
851    }
852
853    #[test]
854    fn test_parse_markdown_empty() {
855        let blocks = parse_markdown("");
856        assert_eq!(blocks.len(), 1);
857        assert!(blocks[0].spans[0].text.is_empty());
858    }
859
860    #[test]
861    fn test_parse_html_empty() {
862        let blocks = parse_html("");
863        assert_eq!(blocks.len(), 1);
864        assert!(blocks[0].spans[0].text.is_empty());
865    }
866
867    #[test]
868    fn test_parse_html_nested_formatting() {
869        let blocks = parse_html("<p><b><i>bold italic</i></b></p>");
870        assert_eq!(blocks.len(), 1);
871        let span = &blocks[0].spans[0];
872        assert!(span.bold);
873        assert!(span.italic);
874    }
875
876    #[test]
877    fn test_parse_html_link() {
878        let blocks = parse_html("<p><a href=\"http://example.com\">click</a></p>");
879        assert_eq!(blocks.len(), 1);
880        let span = &blocks[0].spans[0];
881        assert_eq!(span.text, "click");
882        assert_eq!(span.link_href, Some("http://example.com".to_string()));
883    }
884
885    #[test]
886    fn test_parse_html_ordered_list() {
887        let blocks = parse_html("<ol><li>first</li><li>second</li></ol>");
888        assert!(blocks.len() >= 2);
889        assert_eq!(blocks[0].list_style, Some(ListStyle::Decimal));
890    }
891
892    #[test]
893    fn test_parse_markdown_ordered_list() {
894        let blocks = parse_markdown("1. first\n2. second");
895        assert!(blocks.len() >= 2);
896        assert_eq!(blocks[0].list_style, Some(ListStyle::Decimal));
897    }
898
899    #[test]
900    fn test_parse_html_blockquote_nested() {
901        let blocks = parse_html("<p>before</p><blockquote>quoted</blockquote><p>after</p>");
902        assert!(blocks.len() >= 3);
903    }
904
905    #[test]
906    fn test_parse_block_styles_line_height() {
907        let styles = parse_block_styles("line-height: 1.5");
908        assert_eq!(styles.line_height, Some(1500));
909    }
910
911    #[test]
912    fn test_parse_block_styles_direction_rtl() {
913        let styles = parse_block_styles("direction: rtl");
914        assert_eq!(styles.direction, Some(TextDirection::RightToLeft));
915    }
916
917    #[test]
918    fn test_parse_block_styles_background_color() {
919        let styles = parse_block_styles("background-color: #ff0000");
920        assert_eq!(styles.background_color, Some("#ff0000".to_string()));
921    }
922
923    #[test]
924    fn test_parse_block_styles_white_space_pre() {
925        let styles = parse_block_styles("white-space: pre");
926        assert_eq!(styles.non_breakable_lines, Some(true));
927    }
928
929    #[test]
930    fn test_parse_block_styles_multiple() {
931        let styles = parse_block_styles("line-height: 2.0; direction: rtl; background-color: blue");
932        assert_eq!(styles.line_height, Some(2000));
933        assert_eq!(styles.direction, Some(TextDirection::RightToLeft));
934        assert_eq!(styles.background_color, Some("blue".to_string()));
935    }
936
937    #[test]
938    fn test_parse_html_block_styles_extracted() {
939        let blocks = parse_html(
940            r#"<p style="line-height: 1.5; direction: rtl; background-color: #ccc">text</p>"#,
941        );
942        assert_eq!(blocks.len(), 1);
943        assert_eq!(blocks[0].line_height, Some(1500));
944        assert_eq!(blocks[0].direction, Some(TextDirection::RightToLeft));
945        assert_eq!(blocks[0].background_color, Some("#ccc".to_string()));
946    }
947
948    #[test]
949    fn test_parse_html_white_space_pre() {
950        let blocks = parse_html(r#"<p style="white-space: pre">code</p>"#);
951        assert_eq!(blocks.len(), 1);
952        assert_eq!(blocks[0].non_breakable_lines, Some(true));
953    }
954
955    #[test]
956    fn test_parse_html_no_styles_returns_none() {
957        let blocks = parse_html("<p>plain</p>");
958        assert_eq!(blocks.len(), 1);
959        assert_eq!(blocks[0].line_height, None);
960        assert_eq!(blocks[0].direction, None);
961        assert_eq!(blocks[0].background_color, None);
962        assert_eq!(blocks[0].non_breakable_lines, None);
963    }
964}