skymark 0.1.1

HTML-to-Markdown converter prioritizing proper conversion for human readability
Documentation
#![allow(missing_docs, reason = "allow in tests")]

use skymark::HtmlToMarkdown;

fn translate(html: &str) -> String {
    HtmlToMarkdown::new().translate(html)
}

#[test]
fn single_row_single_column_table() {
    let expected = "| col1 |\n| ---- |";
    assert_eq!(
        translate("<table><tr><th>  col1 </th></tr></table>"),
        expected
    );
    assert_eq!(
        translate("<table><tr><td>  col1 </td></tr></table>"),
        expected
    );
    assert_eq!(translate("<table><td>  col1 </td></table>"), expected);
}

#[test]
fn single_row_table() {
    let expected = "| col1 | col2 |\n| ---- | ---- |";
    assert_eq!(
        translate("<table><tr><th>  col1 </th><td>col2  </td></tr></table>"),
        expected
    );
    assert_eq!(
        translate("<table><tr><td>  col1 </td><td>col2  </td></table>"),
        expected
    );
    assert_eq!(
        translate("<table><td>  col1 </td><td>col2  </td></table>"),
        expected
    );
}

#[test]
fn table_with_caption() {
    let expected = "__Hello__\n| col1 | col2 |\n| ---- | ---- |";
    assert_eq!(
        translate(
            "<table><caption>Hello</caption><tr><th>  col1 </th><td>col2  </td></tr></table>"
        ),
        expected
    );
    assert_eq!(
        translate("<table><th>  col1 </th><td>col2  </td><caption>Hello</caption></table>"),
        expected
    );
}

#[test]
fn table_pipe_is_escaped() {
    assert_eq!(
        translate("<table><tr><td>A|B</td></tr></table>"),
        "| A\\|B |\n| ---- |"
    );
}

#[test]
fn table_pads_cells() {
    let html = "<table><tr><td>abc</td><td>def</td><td>ghi</td></tr><tr><td>abc1</td><td>def123</td><td>ghi1234567</td></tr><tr><td>a</td><td>def1234</td><td>c</td></tr></table>";
    let expected = "| abc  | def     | ghi        |\n| ---- | ------- | ---------- |\n| abc1 | def123  | ghi1234567 |\n| a    | def1234 | c          |";
    assert_eq!(translate(html), expected);
}

#[test]
fn nested_tables_are_flattened() {
    let html =
        "<table><tr><td><table><tr><td>nested</td></tr></table></td><td>abc</td></tr></table>";
    assert_eq!(translate(html), "| nested | abc |\n| ------ | --- |");
}

#[test]
fn table_supports_inline_tags_and_mismatched_rows() {
    let html = r#"
      <table>
        <thead>
          <tr>
            <th>COL1</th>
            <th>C
            O
            L2</th>
          </tr>
        </thead>
        <tbody>
          <tr>
            <th><b>b</b></th>
            <td><i>i</i></td>
            <td><a href="link">a</a></td>
            <td><img src="file"></td>
          </tr>
          <tr>
            <th><ul><li>list</li><li></li></ul></th>
            <td><hr></td>
            <td><h1>h1</h1></td>
          </tr>
        </tbody>
      </table>
    "#;
    let expected = "| COL1  | C O L2 |           |           |\n| ----- | ------ | --------- | --------- |\n| **b** | _i_    | [a](link) | ![](file) |\n| list  |        | h1        |           |";
    assert_eq!(translate(html), expected);
}

#[test]
fn table_empty_cells() {
    let html = "<table><tr><td>abc</td><td>def</td><td>ghi</td></tr><tr><td></td><td></td><td>ghi1234567</td></tr><tr><td>abc1</td><td>def1234</td><td>c</td></tr></table>";
    let expected = "| abc  | def     | ghi        |\n| ---- | ------- | ---------- |\n|      |         | ghi1234567 |\n| abc1 | def1234 | c          |";
    assert_eq!(translate(html), expected);
}

#[test]
fn table_inside_list_item() {
    let html = r#"
        <ul>
        <li>
          <p>foo</p>
          <table>
            <thead>
              <tr>
                <th>foo</th>
                <th>bar</th>
              </tr>
            </thead>
            <tbody>
              <tr>
                <td>baz</td>
                <td>qux</td>
              </tr>
            </tbody>
          </table>
        </li>
      </ul>
    "#;

    let result = translate(html);
    assert!(!result.contains("|\\"));
    assert!(result.starts_with("* foo"));
    assert!(result.contains("| foo | bar |"));
    assert!(result.contains("| --- | --- |"));
    assert!(result.contains("| baz | qux |"));
}