Skip to main content

text_document_common/parser_tools/
content_parser.rs

1use crate::entities::{ListStyle, TextDirection};
2
3/// A parsed inline span with formatting info
4#[derive(Debug, Clone, Default)]
5pub struct ParsedSpan {
6    pub text: String,
7    pub bold: bool,
8    pub italic: bool,
9    pub underline: bool,
10    pub strikeout: bool,
11    pub code: bool,
12    pub link_href: Option<String>,
13}
14
15/// A parsed block (paragraph, heading, list item, code block)
16#[derive(Debug, Clone)]
17pub struct ParsedBlock {
18    pub spans: Vec<ParsedSpan>,
19    pub heading_level: Option<i64>,
20    pub list_style: Option<ListStyle>,
21    pub is_code_block: bool,
22    pub line_height: Option<i64>,
23    pub non_breakable_lines: Option<bool>,
24    pub direction: Option<TextDirection>,
25    pub background_color: Option<String>,
26}
27
28impl ParsedBlock {
29    /// Returns `true` when this block carries no block-level formatting,
30    /// meaning its content is purely inline.
31    pub fn is_inline_only(&self) -> bool {
32        self.heading_level.is_none()
33            && self.list_style.is_none()
34            && !self.is_code_block
35            && self.line_height.is_none()
36            && self.non_breakable_lines.is_none()
37            && self.direction.is_none()
38            && self.background_color.is_none()
39    }
40}
41
42// ─── Markdown parsing ────────────────────────────────────────────────
43
44pub fn parse_markdown(markdown: &str) -> Vec<ParsedBlock> {
45    use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
46
47    let options =
48        Options::ENABLE_STRIKETHROUGH | Options::ENABLE_TABLES | Options::ENABLE_TASKLISTS;
49    let parser = Parser::new_ext(markdown, options);
50
51    let mut blocks: Vec<ParsedBlock> = Vec::new();
52    let mut current_spans: Vec<ParsedSpan> = Vec::new();
53    let mut current_heading: Option<i64> = None;
54    let mut current_list_style: Option<ListStyle> = None;
55    let mut is_code_block = false;
56    let mut in_block = false;
57
58    // Formatting state stack
59    let mut bold = false;
60    let mut italic = false;
61    let mut strikeout = false;
62    let mut link_href: Option<String> = None;
63
64    // List style stack for nested lists
65    let mut list_stack: Vec<Option<ListStyle>> = Vec::new();
66
67    for event in parser {
68        match event {
69            Event::Start(Tag::Paragraph) => {
70                in_block = true;
71                current_heading = None;
72                is_code_block = false;
73            }
74            Event::End(TagEnd::Paragraph) => {
75                if !current_spans.is_empty() || in_block {
76                    blocks.push(ParsedBlock {
77                        spans: std::mem::take(&mut current_spans),
78                        heading_level: current_heading.take(),
79                        list_style: current_list_style.clone(),
80                        is_code_block: false,
81                        line_height: None,
82                        non_breakable_lines: None,
83                        direction: None,
84                        background_color: None,
85                    });
86                }
87                in_block = false;
88                current_list_style = None;
89            }
90            Event::Start(Tag::Heading { level, .. }) => {
91                in_block = true;
92                current_heading = Some(heading_level_to_i64(level));
93                is_code_block = false;
94            }
95            Event::End(TagEnd::Heading(_)) => {
96                blocks.push(ParsedBlock {
97                    spans: std::mem::take(&mut current_spans),
98                    heading_level: current_heading.take(),
99                    list_style: None,
100                    is_code_block: false,
101                    line_height: None,
102                    non_breakable_lines: None,
103                    direction: None,
104                    background_color: None,
105                });
106                in_block = false;
107            }
108            Event::Start(Tag::List(ordered)) => {
109                let style = if ordered.is_some() {
110                    Some(ListStyle::Decimal)
111                } else {
112                    Some(ListStyle::Disc)
113                };
114                list_stack.push(style);
115            }
116            Event::End(TagEnd::List(_)) => {
117                list_stack.pop();
118            }
119            Event::Start(Tag::Item) => {
120                in_block = true;
121                current_list_style = list_stack.last().cloned().flatten();
122            }
123            Event::End(TagEnd::Item) => {
124                // The paragraph inside the item will have already been flushed,
125                // but if there was no inner paragraph (tight list), flush now.
126                if !current_spans.is_empty() {
127                    blocks.push(ParsedBlock {
128                        spans: std::mem::take(&mut current_spans),
129                        heading_level: None,
130                        list_style: current_list_style.clone(),
131                        is_code_block: false,
132                        line_height: None,
133                        non_breakable_lines: None,
134                        direction: None,
135                        background_color: None,
136                    });
137                }
138                in_block = false;
139                current_list_style = None;
140            }
141            Event::Start(Tag::CodeBlock(_)) => {
142                in_block = true;
143                is_code_block = true;
144            }
145            Event::End(TagEnd::CodeBlock) => {
146                blocks.push(ParsedBlock {
147                    spans: std::mem::take(&mut current_spans),
148                    heading_level: None,
149                    list_style: None,
150                    is_code_block: true,
151                    line_height: None,
152                    non_breakable_lines: None,
153                    direction: None,
154                    background_color: None,
155                });
156                in_block = false;
157                is_code_block = false;
158            }
159            Event::Start(Tag::Emphasis) => {
160                italic = true;
161            }
162            Event::End(TagEnd::Emphasis) => {
163                italic = false;
164            }
165            Event::Start(Tag::Strong) => {
166                bold = true;
167            }
168            Event::End(TagEnd::Strong) => {
169                bold = false;
170            }
171            Event::Start(Tag::Strikethrough) => {
172                strikeout = true;
173            }
174            Event::End(TagEnd::Strikethrough) => {
175                strikeout = false;
176            }
177            Event::Start(Tag::Link { dest_url, .. }) => {
178                link_href = Some(dest_url.to_string());
179            }
180            Event::End(TagEnd::Link) => {
181                link_href = None;
182            }
183            Event::Text(text) => {
184                if !in_block {
185                    // Bare text outside any block — create an implicit paragraph
186                    in_block = true;
187                }
188                current_spans.push(ParsedSpan {
189                    text: text.to_string(),
190                    bold,
191                    italic,
192                    underline: false,
193                    strikeout,
194                    code: is_code_block,
195                    link_href: link_href.clone(),
196                });
197            }
198            Event::Code(text) => {
199                if !in_block {
200                    in_block = true;
201                }
202                current_spans.push(ParsedSpan {
203                    text: text.to_string(),
204                    bold,
205                    italic,
206                    underline: false,
207                    strikeout,
208                    code: true,
209                    link_href: link_href.clone(),
210                });
211            }
212            Event::SoftBreak => {
213                // Add a space
214                current_spans.push(ParsedSpan {
215                    text: " ".to_string(),
216                    bold,
217                    italic,
218                    underline: false,
219                    strikeout,
220                    code: false,
221                    link_href: link_href.clone(),
222                });
223            }
224            Event::HardBreak => {
225                // Finalize current block
226                if !current_spans.is_empty() || in_block {
227                    blocks.push(ParsedBlock {
228                        spans: std::mem::take(&mut current_spans),
229                        heading_level: current_heading.take(),
230                        list_style: current_list_style.clone(),
231                        is_code_block,
232                        line_height: None,
233                        non_breakable_lines: None,
234                        direction: None,
235                        background_color: None,
236                    });
237                }
238            }
239            _ => {}
240        }
241    }
242
243    // Flush any remaining content
244    if !current_spans.is_empty() {
245        blocks.push(ParsedBlock {
246            spans: std::mem::take(&mut current_spans),
247            heading_level: current_heading,
248            list_style: current_list_style,
249            is_code_block,
250            line_height: None,
251            non_breakable_lines: None,
252            direction: None,
253            background_color: None,
254        });
255    }
256
257    // If no blocks were parsed, create a single empty paragraph
258    if blocks.is_empty() {
259        blocks.push(ParsedBlock {
260            spans: vec![ParsedSpan {
261                text: String::new(),
262                ..Default::default()
263            }],
264            heading_level: None,
265            list_style: None,
266            is_code_block: false,
267            line_height: None,
268            non_breakable_lines: None,
269            direction: None,
270            background_color: None,
271        });
272    }
273
274    blocks
275}
276
277fn heading_level_to_i64(level: pulldown_cmark::HeadingLevel) -> i64 {
278    use pulldown_cmark::HeadingLevel;
279    match level {
280        HeadingLevel::H1 => 1,
281        HeadingLevel::H2 => 2,
282        HeadingLevel::H3 => 3,
283        HeadingLevel::H4 => 4,
284        HeadingLevel::H5 => 5,
285        HeadingLevel::H6 => 6,
286    }
287}
288
289// ─── HTML parsing ────────────────────────────────────────────────────
290
291use scraper::Node;
292
293/// Parsed CSS block-level styles from an inline `style` attribute.
294#[derive(Debug, Clone, Default)]
295struct BlockStyles {
296    line_height: Option<i64>,
297    non_breakable_lines: Option<bool>,
298    direction: Option<TextDirection>,
299    background_color: Option<String>,
300}
301
302/// Parse relevant CSS properties from an inline style string.
303/// Handles: line-height, white-space, direction, background-color.
304fn parse_block_styles(style: &str) -> BlockStyles {
305    let mut result = BlockStyles::default();
306    for part in style.split(';') {
307        let part = part.trim();
308        if let Some((prop, val)) = part.split_once(':') {
309            let prop = prop.trim().to_ascii_lowercase();
310            let val = val.trim();
311            match prop.as_str() {
312                "line-height" => {
313                    // Try parsing as a plain number (multiplier)
314                    if let Ok(v) = val.parse::<f64>() {
315                        result.line_height = Some((v * 1000.0) as i64);
316                    }
317                }
318                "white-space" => {
319                    if val == "pre" || val == "nowrap" || val == "pre-wrap" {
320                        result.non_breakable_lines = Some(true);
321                    }
322                }
323                "direction" => {
324                    if val.eq_ignore_ascii_case("rtl") {
325                        result.direction = Some(TextDirection::RightToLeft);
326                    } else if val.eq_ignore_ascii_case("ltr") {
327                        result.direction = Some(TextDirection::LeftToRight);
328                    }
329                }
330                "background-color" | "background" => {
331                    result.background_color = Some(val.to_string());
332                }
333                _ => {}
334            }
335        }
336    }
337    result
338}
339
340pub fn parse_html(html: &str) -> Vec<ParsedBlock> {
341    use scraper::Html;
342
343    let fragment = Html::parse_fragment(html);
344    let mut blocks: Vec<ParsedBlock> = Vec::new();
345
346    // Walk the DOM tree starting from the root
347    let root = fragment.root_element();
348
349    #[derive(Clone, Default)]
350    struct FmtState {
351        bold: bool,
352        italic: bool,
353        underline: bool,
354        strikeout: bool,
355        code: bool,
356        link_href: Option<String>,
357    }
358
359    const MAX_RECURSION_DEPTH: usize = 256;
360
361    fn walk_node(
362        node: ego_tree::NodeRef<Node>,
363        state: &FmtState,
364        blocks: &mut Vec<ParsedBlock>,
365        current_list_style: &Option<ListStyle>,
366        depth: usize,
367    ) {
368        if depth > MAX_RECURSION_DEPTH {
369            return;
370        }
371        match node.value() {
372            Node::Element(el) => {
373                let tag = el.name();
374                let mut new_state = state.clone();
375                let mut new_list_style = current_list_style.clone();
376
377                // Determine if this is a block-level element
378                let is_block_tag = matches!(
379                    tag,
380                    "p" | "div"
381                        | "h1"
382                        | "h2"
383                        | "h3"
384                        | "h4"
385                        | "h5"
386                        | "h6"
387                        | "li"
388                        | "pre"
389                        | "br"
390                        | "blockquote"
391                );
392
393                // Update formatting state
394                match tag {
395                    "b" | "strong" => new_state.bold = true,
396                    "i" | "em" => new_state.italic = true,
397                    "u" | "ins" => new_state.underline = true,
398                    "s" | "del" | "strike" => new_state.strikeout = true,
399                    "code" => new_state.code = true,
400                    "a" => {
401                        if let Some(href) = el.attr("href") {
402                            new_state.link_href = Some(href.to_string());
403                        }
404                    }
405                    "ul" => {
406                        new_list_style = Some(ListStyle::Disc);
407                    }
408                    "ol" => {
409                        new_list_style = Some(ListStyle::Decimal);
410                    }
411                    _ => {}
412                }
413
414                // Determine heading level
415                let heading_level = match tag {
416                    "h1" => Some(1),
417                    "h2" => Some(2),
418                    "h3" => Some(3),
419                    "h4" => Some(4),
420                    "h5" => Some(5),
421                    "h6" => Some(6),
422                    _ => None,
423                };
424
425                let is_code_block = tag == "pre";
426
427                // Extract CSS styles from block-level elements
428                let css = if is_block_tag {
429                    el.attr("style").map(parse_block_styles).unwrap_or_default()
430                } else {
431                    BlockStyles::default()
432                };
433
434                if tag == "br" {
435                    // <br> creates a new block
436                    blocks.push(ParsedBlock {
437                        spans: vec![ParsedSpan {
438                            text: String::new(),
439                            ..Default::default()
440                        }],
441                        heading_level: None,
442                        list_style: None,
443                        is_code_block: false,
444                        line_height: None,
445                        non_breakable_lines: None,
446                        direction: None,
447                        background_color: None,
448                    });
449                    return;
450                }
451
452                if is_block_tag && tag != "br" {
453                    // Start collecting spans for a new block
454                    let mut spans: Vec<ParsedSpan> = Vec::new();
455                    collect_inline_spans(
456                        node,
457                        &new_state,
458                        &mut spans,
459                        &new_list_style,
460                        blocks,
461                        depth + 1,
462                    );
463
464                    let list_style_for_block = if tag == "li" {
465                        new_list_style.clone()
466                    } else {
467                        None
468                    };
469
470                    if !spans.is_empty() || heading_level.is_some() {
471                        blocks.push(ParsedBlock {
472                            spans,
473                            heading_level,
474                            list_style: list_style_for_block,
475                            is_code_block,
476                            line_height: css.line_height,
477                            non_breakable_lines: css.non_breakable_lines,
478                            direction: css.direction,
479                            background_color: css.background_color,
480                        });
481                    }
482                } else if matches!(tag, "ul" | "ol" | "table" | "thead" | "tbody" | "tr") {
483                    // Container elements: recurse into children
484                    for child in node.children() {
485                        walk_node(child, &new_state, blocks, &new_list_style, depth + 1);
486                    }
487                } else {
488                    // Inline element or unknown: recurse
489                    for child in node.children() {
490                        walk_node(child, &new_state, blocks, current_list_style, depth + 1);
491                    }
492                }
493            }
494            Node::Text(text) => {
495                let t = text.text.to_string();
496                let trimmed = t.trim();
497                if !trimmed.is_empty() {
498                    // Bare text not in a block — create a paragraph
499                    blocks.push(ParsedBlock {
500                        spans: vec![ParsedSpan {
501                            text: trimmed.to_string(),
502                            bold: state.bold,
503                            italic: state.italic,
504                            underline: state.underline,
505                            strikeout: state.strikeout,
506                            code: state.code,
507                            link_href: state.link_href.clone(),
508                        }],
509                        heading_level: None,
510                        list_style: None,
511                        is_code_block: false,
512                        line_height: None,
513                        non_breakable_lines: None,
514                        direction: None,
515                        background_color: None,
516                    });
517                }
518            }
519            _ => {
520                // Document, Comment, etc. — recurse children
521                for child in node.children() {
522                    walk_node(child, state, blocks, current_list_style, depth + 1);
523                }
524            }
525        }
526    }
527
528    /// Collect inline spans from a block-level element's children.
529    /// If a nested block-level element is encountered, it is flushed as a
530    /// separate block.
531    fn collect_inline_spans(
532        node: ego_tree::NodeRef<Node>,
533        state: &FmtState,
534        spans: &mut Vec<ParsedSpan>,
535        current_list_style: &Option<ListStyle>,
536        blocks: &mut Vec<ParsedBlock>,
537        depth: usize,
538    ) {
539        if depth > MAX_RECURSION_DEPTH {
540            return;
541        }
542        for child in node.children() {
543            match child.value() {
544                Node::Text(text) => {
545                    let t = text.text.to_string();
546                    if !t.is_empty() {
547                        spans.push(ParsedSpan {
548                            text: t,
549                            bold: state.bold,
550                            italic: state.italic,
551                            underline: state.underline,
552                            strikeout: state.strikeout,
553                            code: state.code,
554                            link_href: state.link_href.clone(),
555                        });
556                    }
557                }
558                Node::Element(el) => {
559                    let tag = el.name();
560                    let mut new_state = state.clone();
561
562                    match tag {
563                        "b" | "strong" => new_state.bold = true,
564                        "i" | "em" => new_state.italic = true,
565                        "u" | "ins" => new_state.underline = true,
566                        "s" | "del" | "strike" => new_state.strikeout = true,
567                        "code" => new_state.code = true,
568                        "a" => {
569                            if let Some(href) = el.attr("href") {
570                                new_state.link_href = Some(href.to_string());
571                            }
572                        }
573                        _ => {}
574                    }
575
576                    // Check for nested block elements
577                    let nested_block = matches!(
578                        tag,
579                        "p" | "div"
580                            | "h1"
581                            | "h2"
582                            | "h3"
583                            | "h4"
584                            | "h5"
585                            | "h6"
586                            | "li"
587                            | "pre"
588                            | "blockquote"
589                            | "ul"
590                            | "ol"
591                    );
592
593                    if tag == "br" {
594                        // br within a block: treat as splitting into new block
595                        // For simplicity, just add a newline to current span
596                        spans.push(ParsedSpan {
597                            text: String::new(),
598                            ..Default::default()
599                        });
600                    } else if nested_block {
601                        // Flush as separate block
602                        walk_node(child, &new_state, blocks, current_list_style, depth + 1);
603                    } else {
604                        // Inline element: recurse
605                        collect_inline_spans(
606                            child,
607                            &new_state,
608                            spans,
609                            current_list_style,
610                            blocks,
611                            depth + 1,
612                        );
613                    }
614                }
615                _ => {}
616            }
617        }
618    }
619
620    let initial_state = FmtState::default();
621    for child in root.children() {
622        walk_node(child, &initial_state, &mut blocks, &None, 0);
623    }
624
625    // If no blocks were parsed, create a single empty paragraph
626    if blocks.is_empty() {
627        blocks.push(ParsedBlock {
628            spans: vec![ParsedSpan {
629                text: String::new(),
630                ..Default::default()
631            }],
632            heading_level: None,
633            list_style: None,
634            is_code_block: false,
635            line_height: None,
636            non_breakable_lines: None,
637            direction: None,
638            background_color: None,
639        });
640    }
641
642    blocks
643}
644
645#[cfg(test)]
646mod tests {
647    use super::*;
648
649    #[test]
650    fn test_parse_markdown_simple_paragraph() {
651        let blocks = parse_markdown("Hello **world**");
652        assert_eq!(blocks.len(), 1);
653        assert!(blocks[0].spans.len() >= 2);
654        // "Hello " is plain, "world" is bold
655        let plain_span = blocks[0]
656            .spans
657            .iter()
658            .find(|s| s.text.contains("Hello"))
659            .unwrap();
660        assert!(!plain_span.bold);
661        let bold_span = blocks[0].spans.iter().find(|s| s.text == "world").unwrap();
662        assert!(bold_span.bold);
663    }
664
665    #[test]
666    fn test_parse_markdown_heading() {
667        let blocks = parse_markdown("# Title");
668        assert_eq!(blocks.len(), 1);
669        assert_eq!(blocks[0].heading_level, Some(1));
670        assert_eq!(blocks[0].spans[0].text, "Title");
671    }
672
673    #[test]
674    fn test_parse_markdown_list() {
675        let blocks = parse_markdown("- item1\n- item2");
676        assert!(blocks.len() >= 2);
677        assert_eq!(blocks[0].list_style, Some(ListStyle::Disc));
678        assert_eq!(blocks[1].list_style, Some(ListStyle::Disc));
679    }
680
681    #[test]
682    fn test_parse_html_simple() {
683        let blocks = parse_html("<p>Hello <b>world</b></p>");
684        assert_eq!(blocks.len(), 1);
685        assert!(blocks[0].spans.len() >= 2);
686        let bold_span = blocks[0].spans.iter().find(|s| s.text == "world").unwrap();
687        assert!(bold_span.bold);
688    }
689
690    #[test]
691    fn test_parse_html_multiple_paragraphs() {
692        let blocks = parse_html("<p>A</p><p>B</p>");
693        assert_eq!(blocks.len(), 2);
694    }
695
696    #[test]
697    fn test_parse_html_heading() {
698        let blocks = parse_html("<h2>Subtitle</h2>");
699        assert_eq!(blocks.len(), 1);
700        assert_eq!(blocks[0].heading_level, Some(2));
701    }
702
703    #[test]
704    fn test_parse_html_list() {
705        let blocks = parse_html("<ul><li>one</li><li>two</li></ul>");
706        assert!(blocks.len() >= 2);
707        assert_eq!(blocks[0].list_style, Some(ListStyle::Disc));
708    }
709
710    #[test]
711    fn test_parse_markdown_code_block() {
712        let blocks = parse_markdown("```\nfn main() {}\n```");
713        assert_eq!(blocks.len(), 1);
714        assert!(blocks[0].is_code_block);
715        assert!(blocks[0].spans[0].code);
716    }
717
718    #[test]
719    fn test_parse_markdown_nested_formatting() {
720        let blocks = parse_markdown("***bold italic***");
721        assert_eq!(blocks.len(), 1);
722        let span = &blocks[0].spans[0];
723        assert!(span.bold);
724        assert!(span.italic);
725    }
726
727    #[test]
728    fn test_parse_markdown_link() {
729        let blocks = parse_markdown("[click](http://example.com)");
730        assert_eq!(blocks.len(), 1);
731        let span = &blocks[0].spans[0];
732        assert_eq!(span.text, "click");
733        assert_eq!(span.link_href, Some("http://example.com".to_string()));
734    }
735
736    #[test]
737    fn test_parse_markdown_empty() {
738        let blocks = parse_markdown("");
739        assert_eq!(blocks.len(), 1);
740        assert!(blocks[0].spans[0].text.is_empty());
741    }
742
743    #[test]
744    fn test_parse_html_empty() {
745        let blocks = parse_html("");
746        assert_eq!(blocks.len(), 1);
747        assert!(blocks[0].spans[0].text.is_empty());
748    }
749
750    #[test]
751    fn test_parse_html_nested_formatting() {
752        let blocks = parse_html("<p><b><i>bold italic</i></b></p>");
753        assert_eq!(blocks.len(), 1);
754        let span = &blocks[0].spans[0];
755        assert!(span.bold);
756        assert!(span.italic);
757    }
758
759    #[test]
760    fn test_parse_html_link() {
761        let blocks = parse_html("<p><a href=\"http://example.com\">click</a></p>");
762        assert_eq!(blocks.len(), 1);
763        let span = &blocks[0].spans[0];
764        assert_eq!(span.text, "click");
765        assert_eq!(span.link_href, Some("http://example.com".to_string()));
766    }
767
768    #[test]
769    fn test_parse_html_ordered_list() {
770        let blocks = parse_html("<ol><li>first</li><li>second</li></ol>");
771        assert!(blocks.len() >= 2);
772        assert_eq!(blocks[0].list_style, Some(ListStyle::Decimal));
773    }
774
775    #[test]
776    fn test_parse_markdown_ordered_list() {
777        let blocks = parse_markdown("1. first\n2. second");
778        assert!(blocks.len() >= 2);
779        assert_eq!(blocks[0].list_style, Some(ListStyle::Decimal));
780    }
781
782    #[test]
783    fn test_parse_html_blockquote_nested() {
784        let blocks = parse_html("<p>before</p><blockquote>quoted</blockquote><p>after</p>");
785        assert!(blocks.len() >= 3);
786    }
787
788    #[test]
789    fn test_parse_block_styles_line_height() {
790        let styles = parse_block_styles("line-height: 1.5");
791        assert_eq!(styles.line_height, Some(1500));
792    }
793
794    #[test]
795    fn test_parse_block_styles_direction_rtl() {
796        let styles = parse_block_styles("direction: rtl");
797        assert_eq!(styles.direction, Some(TextDirection::RightToLeft));
798    }
799
800    #[test]
801    fn test_parse_block_styles_background_color() {
802        let styles = parse_block_styles("background-color: #ff0000");
803        assert_eq!(styles.background_color, Some("#ff0000".to_string()));
804    }
805
806    #[test]
807    fn test_parse_block_styles_white_space_pre() {
808        let styles = parse_block_styles("white-space: pre");
809        assert_eq!(styles.non_breakable_lines, Some(true));
810    }
811
812    #[test]
813    fn test_parse_block_styles_multiple() {
814        let styles = parse_block_styles("line-height: 2.0; direction: rtl; background-color: blue");
815        assert_eq!(styles.line_height, Some(2000));
816        assert_eq!(styles.direction, Some(TextDirection::RightToLeft));
817        assert_eq!(styles.background_color, Some("blue".to_string()));
818    }
819
820    #[test]
821    fn test_parse_html_block_styles_extracted() {
822        let blocks = parse_html(
823            r#"<p style="line-height: 1.5; direction: rtl; background-color: #ccc">text</p>"#,
824        );
825        assert_eq!(blocks.len(), 1);
826        assert_eq!(blocks[0].line_height, Some(1500));
827        assert_eq!(blocks[0].direction, Some(TextDirection::RightToLeft));
828        assert_eq!(blocks[0].background_color, Some("#ccc".to_string()));
829    }
830
831    #[test]
832    fn test_parse_html_white_space_pre() {
833        let blocks = parse_html(r#"<p style="white-space: pre">code</p>"#);
834        assert_eq!(blocks.len(), 1);
835        assert_eq!(blocks[0].non_breakable_lines, Some(true));
836    }
837
838    #[test]
839    fn test_parse_html_no_styles_returns_none() {
840        let blocks = parse_html("<p>plain</p>");
841        assert_eq!(blocks.len(), 1);
842        assert_eq!(blocks[0].line_height, None);
843        assert_eq!(blocks[0].direction, None);
844        assert_eq!(blocks[0].background_color, None);
845        assert_eq!(blocks[0].non_breakable_lines, None);
846    }
847}