browser_use/tools/
html_to_markdown.rs

1/// HTML to Markdown conversion utilities
2///
3/// This module provides functionality to convert HTML content to clean Markdown format.
4use html2md;
5
6/// Convert HTML content to Markdown format
7///
8/// This function uses the html2md library to convert HTML to Markdown.
9/// It handles common HTML elements like headings, lists, tables, code blocks, etc.
10///
11/// # Arguments
12///
13/// * `html` - The HTML content as a string
14///
15/// # Returns
16///
17/// A String containing the Markdown representation of the HTML
18pub fn convert_html_to_markdown(html: &str) -> String {
19    if html.is_empty() {
20        return String::new();
21    }
22
23    // Use html2md to parse and convert
24    html2md::parse_html(html)
25}
26
27#[cfg(test)]
28mod tests {
29    use super::*;
30
31    #[test]
32    fn test_empty_html() {
33        assert_eq!(convert_html_to_markdown(""), "");
34    }
35
36    #[test]
37    fn test_simple_heading() {
38        let html = "<h1>Test Title</h1>";
39        let md = convert_html_to_markdown(html);
40        // html2md may format headings differently, just check the text is present
41        assert!(
42            md.contains("Test Title"),
43            "Markdown should contain the title text"
44        );
45    }
46
47    #[test]
48    fn test_paragraph() {
49        let html = "<p>This is a paragraph.</p>";
50        let md = convert_html_to_markdown(html);
51        assert!(md.contains("This is a paragraph"));
52    }
53
54    #[test]
55    fn test_link() {
56        let html = r#"<a href="https://example.com">Example</a>"#;
57        let md = convert_html_to_markdown(html);
58        assert!(md.contains("[Example]"));
59        assert!(md.contains("https://example.com"));
60    }
61
62    #[test]
63    fn test_list() {
64        let html = "<ul><li>Item 1</li><li>Item 2</li></ul>";
65        let md = convert_html_to_markdown(html);
66        assert!(md.contains("Item 1"));
67        assert!(md.contains("Item 2"));
68    }
69
70    #[test]
71    fn test_code_block() {
72        let html = "<pre><code>let x = 1;</code></pre>";
73        let md = convert_html_to_markdown(html);
74        assert!(md.contains("let x = 1"));
75    }
76
77    #[test]
78    fn test_table() {
79        let html = "<table><tr><th>Header</th></tr><tr><td>Data</td></tr></table>";
80        let md = convert_html_to_markdown(html);
81        assert!(md.contains("Header"));
82        assert!(md.contains("Data"));
83    }
84
85    #[test]
86    fn test_complex_html() {
87        let html = r#"
88            <article>
89                <h1>Main Title</h1>
90                <p>First paragraph with <strong>bold</strong> and <em>italic</em>.</p>
91                <ul>
92                    <li>List item 1</li>
93                    <li>List item 2</li>
94                </ul>
95            </article>
96        "#;
97        let md = convert_html_to_markdown(html);
98        assert!(md.contains("Main Title"));
99        assert!(md.contains("First paragraph"));
100        assert!(md.contains("List item 1"));
101    }
102}