flowmark 0.3.1 - Docs.rs

//! Parity discrepancy tests: Python flowmark vs Rust flowmark.
//!
//! Each test documents a specific discrepancy found during senior review (2026-02-18)
//! or subsequent stabilization (D17/D18, 2026-05-19). Expected values are derived from
//! Python output and re-verified against the current parity baseline (v0.7.0 as of
//! 2026-05-28; D18 tracks upstream issue #45 which is now released in v0.7.0, so it
//! is exact parity, not an intentional divergence). All 18 discrepancies (D1-D18) are
//! resolved — every test passes.
//!
//! D11 tests invoke both the Python and Rust binaries and compare error output.
//! They require Python flowmark to be installed (e.g., `uv tool install flowmark==0.7.0`).
//!
//! See: docs/project/specs/done/plan-2026-02-18-parity-discrepancies.md
#![allow(clippy::unwrap_used)]

use flowmark::Wrap;
use flowmark::config::ListSpacing;
use flowmark::fill_markdown;
use flowmark::fill_text;

fn fmt(input: &str) -> String {
    fill_markdown(input, true, 88, false, false, false, false, None, ListSpacing::Preserve)
}

fn fmt_semantic(input: &str) -> String {
    fill_markdown(input, true, 88, true, false, false, false, None, ListSpacing::Preserve)
}

fn fmt_width(input: &str, width: usize) -> String {
    fill_markdown(input, true, width, false, false, false, false, None, ListSpacing::Preserve)
}

fn _fmt_semantic_width(input: &str, width: usize) -> String {
    fill_markdown(input, true, width, true, false, false, false, None, ListSpacing::Preserve)
}

fn fmt_tight(input: &str) -> String {
    fill_markdown(input, true, 88, true, false, false, false, None, ListSpacing::Tight)
}

fn fmt_loose(input: &str) -> String {
    fill_markdown(input, true, 88, true, false, false, false, None, ListSpacing::Loose)
}

fn fmt_plaintext(input: &str) -> String {
    // Match Python: Wrap::Wrap (replace_whitespace=false) with html_md_word_split (default).
    // Python's use of html_md_word_splitter in plaintext mode is likely a bug (fmr-5u8i).
    fill_text(input, Wrap::Wrap, 88, "", "", 0, None)
}

// =============================================================================
// D1: Plaintext mode collapses code blocks (fmr-n69j)
// Python's plaintext mode preserves code fence structure.
// =============================================================================

#[test]
fn test_d1_plaintext_preserves_code_fences() {
    let input =
        "Some text.\n\n```javascript\n// This is a code block\nvar x = 5;\n```\n\nMore text.\n";
    let result = fmt_plaintext(input);
    assert!(
        result.contains("```javascript\n// This is a code block\nvar x = 5;\n```"),
        "D1: Plaintext mode should preserve code fence structure, got:\n{result}"
    );
}

#[test]
fn test_d1_plaintext_preserves_empty_code_block() {
    let input = "Before.\n\n```\nThis is\nanother.\n```\n\nAfter.\n";
    let result = fmt_plaintext(input);
    assert!(
        result.contains("```\nThis is\nanother.\n```"),
        "D1: Plaintext mode should preserve unfenced code block, got:\n{result}"
    );
}

// =============================================================================
// D2: Plaintext mode word splitting (fmr-fzth)
// Python's plaintext mode uses html_md_word_splitter which treats markdown
// links as atomic constructs. This is likely a bug in Python (fmr-5u8i) —
// plaintext mode should use simple_word_splitter — but we match the behavior
// for parity.
// =============================================================================

#[test]
fn test_d2_plaintext_treats_markdown_links_as_atomic() {
    // Python's plaintext mode uses html_md_word_splitter, so markdown links
    // are treated as indivisible tokens. The link wraps onto its own line.
    let input = "The school is [St. John's Beaumont School](https://en.wikipedia.org/wiki/St_John%27s_Beaumont_School) in the area.\n";
    let result = fmt_plaintext(input);
    // The link should be kept as one atomic token (matching Python)
    assert!(
        result.contains("[St. John's Beaumont School](https://en.wikipedia.org/wiki/St_John%27s_Beaumont_School)"),
        "D2: Plaintext mode should treat markdown links as atomic (matching Python), got:\n{result}"
    );
    // Verify the text wraps to multiple lines
    assert!(
        result.lines().count() >= 2,
        "D2: Long plaintext with link should wrap to multiple lines, got:\n{result}"
    );
}

// =============================================================================
// D3: Narrow width wraps differently around <sup> tags (fmr-bzra)
// Original discrepancy found at width 60 with 4-space list item indent
// (effective width 56). Test uses width 56 without indent to match
// the effective wrapping behavior.
// =============================================================================

#[test]
fn test_d3_sup_tag_wrapping_at_width_56() {
    // Text without 4-space indent (dedent_input=true would strip it anyway).
    // Width 56 = effective width inside a list item at width 60.
    let input = "wb\\+ mode (binary read/write), automatically deleted when closed or on process termination.<sup>19</sup> While convenient, POSIX notes potential permission issues and recommends mkstemp followed by fdopen for multithreaded apps to avoid leaking file descriptors.<sup>59</sup>\n";
    // Python output (indent stripped, at effective width 56): 6 lines
    let python_output = "wb\\+ mode (binary read/write), automatically\ndeleted when closed or on process\ntermination.<sup>19</sup> While convenient, POSIX\nnotes potential permission issues and recommends\nmkstemp followed by fdopen for multithreaded apps\nto avoid leaking file descriptors.<sup>59</sup>\n";
    let result = fmt_width(input, 56);
    // Verify same number of lines and that <sup> tags are preserved
    let result_lines: Vec<&str> = result.trim_end().lines().collect();
    let python_lines: Vec<&str> = python_output.trim_end().lines().collect();
    assert!(
        result.contains("<sup>19</sup>") && result.contains("<sup>59</sup>"),
        "D3: <sup> tags should be preserved in output, got:\n{result}"
    );
    assert_eq!(
        result_lines.len(),
        python_lines.len(),
        "D3: Width 56 wrapping should produce same number of lines.\nRust ({} lines):\n{}\nPython ({} lines):\n{}",
        result_lines.len(),
        result,
        python_lines.len(),
        python_output,
    );
}

// =============================================================================
// D4: Tight list spacing with nested sublists (fmr-r9k6)
// Python's tight mode adds blank lines between items when any item has sublists,
// but keeps sublists themselves tight. Within-item spacing is loose only when
// the item's sublist has deeper nesting.
// =============================================================================

#[test]
fn test_d4_tight_nested_lists_match_python() {
    let input = "- Level 1a\n  - Level 2a\n    - Level 3a\n- Level 1b\n  - Level 2b\n";
    let result = fmt_tight(input);
    // Python: blank after "Level 1a" (item has complex sublist with deeper nesting),
    // tight between "Level 2a" and "Level 3a", blank before "Level 1b",
    // tight between "Level 1b" and "Level 2b" (item has flat sublist).
    let python_output = "- Level 1a\n\n  - Level 2a\n    - Level 3a\n\n- Level 1b\n  - Level 2b\n";
    assert_eq!(
        result, python_output,
        "D4: Tight nested lists should match Python behavior.\nGot:\n{result}"
    );
}

#[test]
fn test_d4_tight_simple_sublists() {
    // Simple sublists (no deeper nesting) — Python adds blanks between items
    // but keeps within-item spacing tight.
    let input = "- A\n  - B\n- C\n  - D\n";
    let result = fmt_tight(input);
    let python_output = "- A\n  - B\n\n- C\n  - D\n";
    assert_eq!(
        result, python_output,
        "D4: Tight simple sublists should match Python.\nGot:\n{result}"
    );
}

#[test]
fn test_d4_tight_ordered_sublists() {
    let input = "1. Ordered 1\n   1. Sub 1\n   2. Sub 2\n2. Ordered 2\n";
    let result = fmt_tight(input);
    // Python: tight within item (Ordered 1 → Sub 1), blank between items
    let python_output = "1. Ordered 1\n   1. Sub 1\n   2. Sub 2\n\n2. Ordered 2\n";
    assert_eq!(
        result, python_output,
        "D4: Tight ordered sublists should match Python.\nGot:\n{result}"
    );
}

// =============================================================================
// D5: Loose list spacing missing blank lines in footnote embedded lists (fmr-vpg4)
// Python adds blank lines after footnote list items that Rust omits.
// =============================================================================

#[test]
fn test_d5_loose_footnote_list_items() {
    let input = "[^217]: Testing - : Is Ketamine Contraindicated?\n    - REBEL EM - more words,\n      <https://rebelem.com/test>\n\n[^multiline]: Another footnote.\n";
    let result = fmt_loose(input);
    // Python adds blank line after the footnote list item
    assert!(
        result.contains("<https://rebelem.com/test>\n\n[^multiline]:"),
        "D5: Loose mode should add blank line after footnote list items, got:\n{result}"
    );
}

// =============================================================================
// D6: Nested blockquotes get extra blank separator lines (fmr-3i50)
// Rust inserts "> " blank lines between nested blockquote levels.
// =============================================================================

#[test]
fn test_d6_nested_blockquotes_no_extra_blanks() {
    let input = "> Level 1\n> > Level 2\n> > > Level 3\n";
    let python_output = "> Level 1\n> > Level 2\n> > > Level 3\n";
    let result = fmt(input);
    assert_eq!(result, python_output, "D6: Nested blockquotes should not have extra blank lines");
}

#[test]
fn test_d6_two_level_blockquote() {
    let input = "> Outer\n> > Inner\n";
    let result = fmt(input);
    assert!(
        !result.contains(">\n>"),
        "D6: Should not have blank '> ' line between blockquote levels, got:\n{result}"
    );
}

#[test]
fn test_d6_nested_blockquote_preserves_blank_separator() {
    // When source has a blank `>` line between outer and inner blockquote,
    // Python preserves it. Rust must do the same.
    let input = "> Outer quote.\n>\n> > Inner quote.\n";
    let python_output = "> Outer quote.\n> \n> > Inner quote.\n";
    let result = fmt(input);
    assert_eq!(
        result, python_output,
        "D6: Blank `>` separator between blockquote levels should be preserved.\nGot:\n{result}"
    );
}

// =============================================================================
// D7: Footnote body continuation list items collapsed onto one line (fmr-81j7)
// =============================================================================

#[test]
fn test_d7_footnote_with_list_items() {
    let input = "[^3]: Footnote with a list:\n    - Item 1\n    - Item 2\n    - Item 3\n";
    let result = fmt_semantic(input);
    // Python preserves list items on separate lines
    assert!(
        result.contains("- Item 1\n"),
        "D7: Footnote list items should be on separate lines, got:\n{result}"
    );
    assert!(
        result.contains("- Item 2\n"),
        "D7: Footnote list items should be on separate lines, got:\n{result}"
    );
}

#[test]
fn test_d7_footnote_preamble_then_list() {
    let input = "[^3]: Footnote with a list:\n    - Item 1\n    - Item 2\n";
    let result = fmt(input);
    // Preamble and list items should be separate
    assert!(
        !result.contains("list: - Item"),
        "D7: Footnote preamble should not collapse with list items, got:\n{result}"
    );
}

// =============================================================================
// D8: Footnote body blockquote continuation collapsed onto first line (fmr-xcr9)
// =============================================================================

#[test]
fn test_d8_footnote_with_blockquote() {
    let input = "[^4]: Footnote with blockquote:\n    > This is quoted inside footnote.\n";
    let result = fmt(input);
    // Python preserves blockquote on separate line
    assert!(
        result.contains("> This is quoted"),
        "D8: Footnote blockquote should be on its own line, got:\n{result}"
    );
    assert!(
        !result.contains("blockquote: > This"),
        "D8: Footnote blockquote should not be collapsed onto preamble, got:\n{result}"
    );
}

// =============================================================================
// D9: Empty/whitespace input produces no output (fmr-dihn)
// Python always outputs at least a trailing newline.
// =============================================================================

#[test]
fn test_d9_empty_input_outputs_newline() {
    let result = fmt("");
    assert_eq!(result, "\n", "D9: Empty input should produce a trailing newline");
}

#[test]
fn test_d9_whitespace_input_outputs_newline() {
    let result = fmt("   \n  \n");
    assert_eq!(result, "\n", "D9: Whitespace-only input should produce a trailing newline");
}

#[test]
fn test_d9_single_newline_input() {
    let result = fmt("\n");
    assert_eq!(result, "\n", "D9: Single newline input should produce a trailing newline");
}

// =============================================================================
// D10: HTML entities decoded instead of preserved (fmr-gocw)
// Comrak decodes &amp; to &, &lt; to <, etc. Python preserves them.
// =============================================================================

#[test]
fn test_d10_html_entities_preserved() {
    let input = "&amp; &lt; &gt; &quot;\n";
    let result = fmt(input);
    assert_eq!(result, "&amp; &lt; &gt; &quot;\n", "D10: HTML entities should be preserved as-is");
}

#[test]
fn test_d10_html_entity_in_paragraph() {
    let input = "The value is &gt; 5 and &lt; 10.\n";
    let result = fmt(input);
    assert!(
        result.contains("&gt;") && result.contains("&lt;"),
        "D10: HTML entities should be preserved in paragraphs, got:\n{result}"
    );
}

// =============================================================================
// D11: CLI error handling parity (fmr-8ixa)
// Verify that Rust CLI error messages match Python's error messages.
// Requires the Python flowmark binary to be available at the expected path.
// =============================================================================

#[cfg(feature = "cli")]
/// Run a CLI binary with args and capture stderr + exit code.
fn run_cli(bin: &str, args: &[&str]) -> (String, i32) {
    let output = std::process::Command::new(bin)
        .args(args)
        .output()
        .unwrap_or_else(|e| panic!("Failed to run {bin}: {e}"));
    let stderr = String::from_utf8_lossy(&output.stderr).to_string();
    let code = output.status.code().unwrap_or(-1);
    (stderr.trim_end().to_string(), code)
}

#[cfg(feature = "cli")]
fn run_cli_stdin(bin: &str, args: &[&str], stdin: &str) -> (String, i32) {
    use std::io::Write;
    let mut child = std::process::Command::new(bin)
        .args(args)
        .stdin(std::process::Stdio::piped())
        .stdout(std::process::Stdio::piped())
        .stderr(std::process::Stdio::piped())
        .spawn()
        .unwrap_or_else(|e| panic!("Failed to run {bin}: {e}"));
    // The child may reject its arguments and exit before reading stdin (e.g.
    // `--inplace -`), closing the read end of the pipe. The resulting broken-pipe write
    // is expected and benign here — we assert on the child's stderr and exit code, not on
    // the write succeeding — so ignore the error instead of `.unwrap()`ing it (which
    // races against process teardown and flakes intermittently, especially on Windows).
    if let Some(mut stdin_pipe) = child.stdin.take() {
        let _ = stdin_pipe.write_all(stdin.as_bytes());
    }
    let output = child.wait_with_output().unwrap();
    let stderr = String::from_utf8_lossy(&output.stderr).to_string();
    let code = output.status.code().unwrap_or(-1);
    (stderr.trim_end().to_string(), code)
}

#[cfg(feature = "cli")]
fn python_flowmark() -> &'static str {
    "flowmark"
}

#[cfg(feature = "cli")]
fn rust_flowmark() -> String {
    let root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
    root.join("target/debug/flowmark").to_string_lossy().to_string()
}

#[test]
#[cfg(feature = "cli")]
fn test_d11_no_args_error_matches_python() {
    let (py_err, py_code) = run_cli(python_flowmark(), &[]);
    let (rs_err, rs_code) = run_cli(&rust_flowmark(), &[]);
    assert_eq!(rs_err, py_err, "D11: No-args error message should match Python");
    assert_eq!(rs_code, py_code, "D11: No-args exit code should match Python");
}

#[test]
#[cfg(feature = "cli")]
fn test_d11_auto_no_args_error_matches_python() {
    let (py_err, py_code) = run_cli(python_flowmark(), &["--auto"]);
    let (rs_err, rs_code) = run_cli(&rust_flowmark(), &["--auto"]);
    assert_eq!(rs_err, py_err, "D11: --auto no-args error should match Python");
    assert_eq!(rs_code, py_code, "D11: --auto no-args exit code should match Python");
}

#[test]
#[cfg(feature = "cli")]
fn test_d11_inplace_stdin_error_matches_python() {
    let (py_err, py_code) = run_cli_stdin(python_flowmark(), &["--inplace", "-"], "hello\n");
    let (rs_err, rs_code) = run_cli_stdin(&rust_flowmark(), &["--inplace", "-"], "hello\n");
    assert_eq!(rs_err, py_err, "D11: --inplace stdin error should match Python");
    assert_eq!(rs_code, py_code, "D11: --inplace stdin exit code should match Python");
}

#[test]
#[cfg(feature = "cli")]
fn test_d11_output_multiple_files_error_matches_python() {
    let (py_err, py_code) = run_cli(python_flowmark(), &["-o", "out.md", "/dev/null", "/dev/null"]);
    let (rs_err, rs_code) = run_cli(&rust_flowmark(), &["-o", "out.md", "/dev/null", "/dev/null"]);
    assert_eq!(rs_err, py_err, "D11: multi-file output error should match Python");
    assert_eq!(rs_code, py_code, "D11: multi-file output exit code should match Python");
}

#[test]
#[cfg(feature = "cli")]
fn test_d11_nonexistent_file_error_format() {
    let (py_err, _py_code) = run_cli(python_flowmark(), &["nonexistent.md"]);
    let (rs_err, _rs_code) = run_cli(&rust_flowmark(), &["nonexistent.md"]);
    // Python: "Error: [Errno 2] No such file or directory: 'nonexistent.md'" (exit 2)
    // Rust:   "Error: Path not found: nonexistent.md" (exit 1)
    // Exact byte-for-byte match isn't possible ([Errno 2] is a Python-ism),
    // but both must: start with "Error:", mention the filename.
    assert!(
        rs_err.starts_with("Error:"),
        "D11: Rust nonexistent file error should start with 'Error:', got: {rs_err}"
    );
    assert!(
        rs_err.contains("nonexistent.md"),
        "D11: Rust error should mention the filename, got: {rs_err}"
    );
    assert!(
        py_err.starts_with("Error:"),
        "D11: Python nonexistent file error should start with 'Error:', got: {py_err}"
    );
    assert!(
        py_err.contains("nonexistent.md"),
        "D11: Python error should mention the filename, got: {py_err}"
    );
}

// =============================================================================
// D12: Paragraph before code fence — extra blank line inserted (P6)
// Python preserves tight paragraph→code fence transition (no blank line).
// Rust inserts a blank line between the paragraph and the opening code fence.
// Root cause: render_block_children() suppress_for_tight doesn't handle
// paragraph→CodeBlock transitions.
// Discovered: Real-world corpus test (ai-trade-arena docs/, 454 instances).
// =============================================================================

#[test]
fn test_d12_paragraph_before_code_fence_tight() {
    // Python keeps paragraph tight against code fence (no blank line).
    let input = "**Configuration Options**:\n```typescript\n{\n  minTime: number,\n}\n```\n";
    let python_output =
        "**Configuration Options**:\n```typescript\n{\n  minTime: number,\n}\n```\n";
    let result = fmt(input);
    assert_eq!(
        result, python_output,
        "D12/P6: Should not insert blank line before code fence when source is tight.\nGot:\n{result}"
    );
}

#[test]
fn test_d12_inline_code_paragraph_before_code_fence() {
    // Paragraph ending with inline code, tight against a code fence.
    let input = "Add to root `package.json`:\n```json\n{\n  \"scripts\": {}\n}\n```\n";
    let python_output = "Add to root `package.json`:\n```json\n{\n  \"scripts\": {}\n}\n```\n";
    let result = fmt(input);
    assert_eq!(
        result, python_output,
        "D12/P6: Inline-code paragraph should stay tight before code fence.\nGot:\n{result}"
    );
}

#[test]
fn test_d12_multiple_tight_code_fences() {
    // Multiple tight paragraph→code fence transitions in one document.
    let input =
        "First block:\n```bash\necho hello\n```\n\nSecond block:\n```python\nprint(\"hi\")\n```\n";
    let python_output =
        "First block:\n```bash\necho hello\n```\n\nSecond block:\n```python\nprint(\"hi\")\n```\n";
    let result = fmt(input);
    assert_eq!(
        result, python_output,
        "D12/P6: Multiple tight code fences should not get extra blank lines.\nGot:\n{result}"
    );
}

// =============================================================================
// D12b: Paragraph before code fence in mixed loose/tight list items (P6)
// When a list has both tight and loose items (some items have blank lines
// before code blocks, some don't), comrak treats the entire list as loose.
// Python preserves the tight transitions (no blank line) even in a loose list.
// Rust adds blank lines before ALL code fences in the list.
// Verified byte-by-byte against Python flowmark v0.6.4 (2026-02-19).
// =============================================================================

#[test]
fn test_d12b_mixed_loose_tight_list_code_fences() {
    // When one list item has a blank line before its code block (making the list
    // "loose"), Rust adds blank lines before ALL code fences. Python doesn't.
    let input = "\
- [ ] Create output:
  ```bash
  cd web
  ```
- [ ] Launch all:

  ```bash
  cd web
  pnpm batch
  ```

  Note: If key not available, skip.
- [ ] Monitor:
  ```bash
  watch ls
  ```
- [ ] Check failures:
  ```bash
  echo check
  ```
";
    let python_output = "\
- [ ] Create output:
  ```bash
  cd web
  ```

- [ ] Launch all:

  ```bash
  cd web
  pnpm batch
  ```

  Note: If key not available, skip.

- [ ] Monitor:
  ```bash
  watch ls
  ```

- [ ] Check failures:
  ```bash
  echo check
  ```
";
    let result = fmt(input);
    assert_eq!(
        result, python_output,
        "D12b/P6: Mixed loose/tight list should not add blank lines before tight code fences.\nGot:\n{result}"
    );
}

// =============================================================================
// D16: Adjacent empty code blocks — extra blank line (Bug 4)
// When a ```` (4-backtick) code block is followed by ``` blocks, comrak
// normalizes to ``` and Rust adds an extra blank line between them.
// Verified against Python flowmark v0.6.4 (2026-02-19).
// =============================================================================

#[test]
fn test_d16_adjacent_empty_code_blocks() {
    // Four-backtick fence followed by empty code blocks.
    // Both normalize ```` to ```, but Rust adds an extra blank line between
    // the adjacent empty ``` blocks.
    let input = "\
Emergency commits:

```bash
git commit --no-verify -m \"WIP: emergency fix\"
````

```

```

Only use `--no-verify` when absolutely necessary.
";
    let python_output = "\
Emergency commits:

```bash
git commit --no-verify -m \"WIP: emergency fix\"
```

```
```

Only use `--no-verify` when absolutely necessary.
";
    let result = fmt(input);
    assert_eq!(
        result, python_output,
        "D16: Adjacent empty code blocks should not have extra blank line between them.\nGot:\n{result}"
    );
}

// =============================================================================
// D17: Thematic break spacing — extra blank line around `* * *` (fmr-thbreak)
// Python/marko preserves the source's tight spacing around a thematic break:
// when a thematic break is adjacent to another block with no blank line in the
// source, the output stays tight. Comrak's renderer instead forces blank lines
// on both sides of every thematic break. Mirrors the existing tight-transition
// handling for HTML comments (Rule 1/2), paragraph→list (Rule 3), and
// paragraph→code (Rule 4) in render_block_children.
// Found via corpus parity check (2026-05-19).
// =============================================================================

#[test]
fn test_d17_thematic_break_before_heading_tight() {
    let input = "text\n\n* * *\n## Heading\n\nmore\n";
    let python_output = "text\n\n* * *\n## Heading\n\nmore\n";
    let result = fmt(input);
    assert_eq!(
        result, python_output,
        "D17: tight thematic break → heading should stay tight.\nGot:\n{result}"
    );
}

#[test]
fn test_d17_thematic_break_after_paragraph_tight() {
    let input = "para\n* * *\n\nb\n";
    let python_output = "para\n* * *\n\nb\n";
    let result = fmt(input);
    assert_eq!(
        result, python_output,
        "D17: tight paragraph → thematic break should stay tight.\nGot:\n{result}"
    );
}

#[test]
fn test_d17_thematic_break_then_paragraph_tight() {
    let input = "a\n\n* * *\npara\n\nb\n";
    let python_output = "a\n\n* * *\npara\n\nb\n";
    let result = fmt(input);
    assert_eq!(
        result, python_output,
        "D17: tight thematic break → paragraph should stay tight.\nGot:\n{result}"
    );
}

#[test]
fn test_d17_thematic_break_consecutive_tight() {
    let input = "a\n\n* * *\n* * *\n\nb\n";
    let python_output = "a\n\n* * *\n* * *\n\nb\n";
    let result = fmt(input);
    assert_eq!(
        result, python_output,
        "D17: consecutive tight thematic breaks should stay tight.\nGot:\n{result}"
    );
}

#[test]
fn test_d17_thematic_break_loose_preserved() {
    // When the source already has blank lines, they are preserved (no change).
    let input = "a\n\n* * *\n\n## Heading\n\nb\n";
    let python_output = "a\n\n* * *\n\n## Heading\n\nb\n";
    let result = fmt(input);
    assert_eq!(
        result, python_output,
        "D17: loose thematic break spacing should be preserved.\nGot:\n{result}"
    );
}

// =============================================================================
// D18: Reference-link normalization (upstream flowmark issue #45)
// A reference link whose text equals its normalized label must render as the
// unambiguous collapsed form `[text][]`, NOT the fragile shortcut `[text]`
// (which merges with a following `(...)` or `[...]`, changing/dropping links).
// When text != normalized label, the full form `[text][label]` is used.
//
// This adopts the upstream fix (already released in Python flowmark > v0.6.5,
// commit 0af9e24). It is an INTENTIONAL divergence from released v0.6.5, which
// still emits the buggy shortcut form. Verified against Python main (v0.6.6.dev)
// and the upstream tests/test_reference_links.py spec.
// =============================================================================

fn fmt_ref(body: &str) -> String {
    fmt(&format!("{body}\n\n[foo]: https://example.com/x\n"))
}

#[test]
fn test_d18_shortcut_ref_normalized_to_collapsed() {
    // [foo] (text == label) -> [foo][], not the fragile shortcut [foo].
    assert_eq!(fmt_ref("Use [foo]"), "Use [foo][]\n\n[foo]: https://example.com/x\n");
}

#[test]
fn test_d18_collapsed_ref_preserved() {
    assert_eq!(fmt_ref("Use [foo][]"), "Use [foo][]\n\n[foo]: https://example.com/x\n");
}

#[test]
fn test_d18_full_ref_label_equals_text_collapsed() {
    // [foo][foo] (text == label) -> [foo][].
    assert_eq!(fmt_ref("Use [foo][foo]"), "Use [foo][]\n\n[foo]: https://example.com/x\n");
}

#[test]
fn test_d18_full_ref_distinct_label_preserved() {
    // [bar][foo] (text != label) stays a full reference.
    assert_eq!(fmt_ref("Use [bar][foo]"), "Use [bar][foo]\n\n[foo]: https://example.com/x\n");
}

#[test]
fn test_d18_uppercase_shortcut_expands_to_full() {
    // [Unreleased] with def [unreleased]: text "Unreleased" != normalized label
    // "unreleased", so the full form is emitted (matches v0.6.5 AND main).
    let input = "## [Unreleased]\n\n[unreleased]: https://example.com/c\n";
    let expected = "## [Unreleased][unreleased]\n\n[unreleased]: https://example.com/c\n";
    assert_eq!(fmt(input), expected);
}

#[test]
fn test_d18_label_normalized_to_lowercase() {
    // [Foo] with def [Foo]: -> [Foo][foo] (link label normalized to lowercase).
    // The def line is also lowercased on render, matching Python: see D20.
    let input = "[Foo]\n\n[Foo]: https://example.com/x\n";
    assert_eq!(fmt(input), "[Foo][foo]\n\n[foo]: https://example.com/x\n");
}

#[test]
fn test_d18_collapsed_ref_label_with_spaces_and_apostrophe() {
    // Reviewer's reproducer: collapsed reference link whose label contains
    // spaces and an apostrophe must round-trip cleanly — no leaked PUA
    // marker, no inline-link fallback. The label is normalized to lowercase
    // and the full form is emitted because text "St. John's School"
    // differs from normalized "st. john's school".
    let input = "See [St. John's School][] here.\n\n[St. John's School]: https://example.com/x\n";
    let expected = "See [St. John's School][st. john's school] here.\n\n[st. john's school]: https://example.com/x\n";
    assert_eq!(fmt(input), expected);
}

#[test]
fn test_d18_shortcut_ref_label_with_spaces_and_apostrophe() {
    // Same case for the shortcut form.
    let input = "See [St. John's School] here.\n\n[St. John's School]: https://example.com/x\n";
    let expected = "See [St. John's School][st. john's school] here.\n\n[st. john's school]: https://example.com/x\n";
    assert_eq!(fmt(input), expected);
}

#[test]
fn test_d18_collapsed_ref_label_lowercase_with_spaces_emits_collapsed() {
    // When the link text already matches the normalized label exactly (the
    // text is all lowercase with the same whitespace), emit the collapsed
    // form `[text][]` per issue #45 — even though the label contains spaces.
    let input = "See [an example][] here.\n\n[an example]: https://example.com/x\n";
    let expected = "See [an example][] here.\n\n[an example]: https://example.com/x\n";
    assert_eq!(fmt(input), expected);
}

// =============================================================================
// D19: Reference-image inlining (parity bug surfaced by PR #54)
// Python flowmark always renders reference images as INLINE form:
// `![alt][label]`, `![alt][]`, and shortcut `![alt]` all become
// `![alt](url)` (or `![alt](url "title")`) on render, with the matched
// link reference definition's destination/title substituted in.
// The Rust port was leaking the COMRAK-WORKAROUND1 PUA marker into the
// rendered URL because the Image render branch didn't decode it, and after
// the hex-encoded label fix in v0.7.0 the leak became hex strings like
// `![alt](696d67)`. Fix: inline image references during pre-parse so comrak
// parses them as proper inline images with the actual URL.
// =============================================================================

#[test]
fn test_d19_image_full_ref_inlined() {
    let input = "![alt][img]\n\n[img]: https://example.com/img.png\n";
    let expected = "![alt](https://example.com/img.png)\n\n[img]: https://example.com/img.png\n";
    assert_eq!(fmt(input), expected);
}

#[test]
fn test_d19_image_collapsed_ref_inlined() {
    let input = "![alt][]\n\n[alt]: https://example.com/img.png\n";
    let expected = "![alt](https://example.com/img.png)\n\n[alt]: https://example.com/img.png\n";
    assert_eq!(fmt(input), expected);
}

#[test]
fn test_d19_image_shortcut_ref_inlined() {
    let input = "![alt]\n\n[alt]: https://example.com/img.png\n";
    let expected = "![alt](https://example.com/img.png)\n\n[alt]: https://example.com/img.png\n";
    assert_eq!(fmt(input), expected);
}

#[test]
fn test_d19_image_with_title_inlined() {
    let input = "![alt][img]\n\n[img]: https://example.com/img.png \"My title\"\n";
    let expected = "![alt](https://example.com/img.png \"My title\")\n\n[img]: https://example.com/img.png \"My title\"\n";
    assert_eq!(fmt(input), expected);
}

#[test]
fn test_d19_image_label_with_spaces_inlined() {
    // Label with spaces must round-trip cleanly (no PUA/hex leak).
    let input = "![Logo][company logo]\n\n[company logo]: https://example.com/logo.png\n";
    let expected =
        "![Logo](https://example.com/logo.png)\n\n[company logo]: https://example.com/logo.png\n";
    assert_eq!(fmt(input), expected);
}

#[test]
fn test_d19_badge_pattern_image_inside_link() {
    // The classic GitHub-badge shape: reference image nested inside a reference
    // link. The image inlines; the OUTER link stays as a reference link.
    let input = "[![alt][img]][url]\n\n[img]: https://example.com/img.png\n[url]: https://example.com/page\n";
    let expected = "[![alt](https://example.com/img.png)][url]\n\n[img]: https://example.com/img.png\n[url]: https://example.com/page\n";
    assert_eq!(fmt(input), expected);
}

#[test]
fn test_d19_image_no_def_unchanged() {
    // No matching definition: leave the markdown as-is (comrak will render it
    // as literal text since defs are extracted from comrak's view).
    let input = "Some ![alt][missing] text here.\n";
    let result = fmt(input);
    assert!(
        !result.contains('\u{F000}') && !result.contains("696d67"),
        "no-def image must not leak PUA or hex: {result}"
    );
}

// =============================================================================
// D20: Link reference definition label lowercased on render (PR #57 follow-up)
// Python flowmark stores reference labels in normalized (lowercase) form and
// emits them lowercased on render: `[Logo]: url` -> `[logo]: url`. The Rust
// port was preserving the original case. Both match the same link (CommonMark
// def matching is case-insensitive), but exact-parity requires matching
// Python's output form.
// =============================================================================

#[test]
fn test_d20_def_label_lowercased_on_render() {
    let input = "[Logo][]\n\n[Logo]: https://example.com/logo.png\n";
    // Both the link's label (full form) and the def line emit the lowercased
    // label. The link text "Logo" preserves its original case.
    let expected = "[Logo][logo]\n\n[logo]: https://example.com/logo.png\n";
    assert_eq!(fmt(input), expected);
}

#[test]
fn test_d20_def_label_with_spaces_lowercased() {
    let input = "[Company Logo][]\n\n[Company Logo]: https://example.com/logo.png\n";
    let expected = "[Company Logo][company logo]\n\n[company logo]: https://example.com/logo.png\n";
    assert_eq!(fmt(input), expected);
}

#[test]
fn test_d20_def_label_already_lowercase_unchanged() {
    let input = "[link][example]\n\n[example]: https://example.com/page\n";
    let expected = "[link][example]\n\n[example]: https://example.com/page\n";
    assert_eq!(fmt(input), expected);
}

#[test]
fn test_d20_def_label_with_title_preserved_lowercased() {
    let input = "[Page][Home]\n\n[Home]: https://example.com \"Welcome home\"\n";
    let expected = "[Page][home]\n\n[home]: https://example.com \"Welcome home\"\n";
    assert_eq!(fmt(input), expected);
}

#[test]
fn test_d18_shortcut_without_definition_unchanged() {
    // [bar] with no matching definition is left as literal text.
    assert_eq!(fmt_ref("Use [bar]"), "Use [bar]\n\n[foo]: https://example.com/x\n");
}

#[test]
fn test_d18_multiple_shortcut_refs_on_one_line() {
    assert_eq!(
        fmt_ref("Both [foo] and [foo] here"),
        "Both [foo][] and [foo][] here\n\n[foo]: https://example.com/x\n"
    );
}

#[test]
fn test_d18_collapsed_form_is_idempotent() {
    // The collapsed output must be a fixed point.
    let once = fmt_ref("Use [foo]");
    assert_eq!(fmt(&once), once, "D18: collapsed reference output must be idempotent");
}

#[test]
fn test_d18_inline_link_unaffected() {
    let input = "See [foo](https://example.com/z) here.\n";
    assert_eq!(fmt(input), "See [foo](https://example.com/z) here.\n");
}

// =============================================================================
// D13: Blockquote blank continuation line indentation (P7)
// Inside blockquotes, blank lines between a list item's text and its child
// content (code block, nested list) must have the list-content indentation.
// Python outputs ">    " (with 4 spaces for numbered list indent).
// Rust outputs just ">" (bare blockquote marker, no indentation).
// Verified byte-by-byte against Python flowmark v0.6.4 (2026-02-19).
// =============================================================================

#[test]
fn test_d13_blockquote_list_code_block_blank_line_indent() {
    // Blockquote with numbered list item followed by code block.
    // Python output verified byte-by-byte: blank line between item text and
    // code block has ">    " (> + 4 spaces), not bare ">".
    let input = "\
> 1. **Copy this file** to a dated version:
>
>    ```
>    template-process.md
>    ```
>
> 2. **Review the previous** for context:
>
>    - Check the section
";
    let python_output = "\
> 1. **Copy this file** to a dated version:
>    \n\
>    ```
>    template-process.md
>    ```
>
> 2. **Review the previous** for context:
>    \n\
>    - Check the section
";
    let result = fmt(input);
    assert_eq!(
        result, python_output,
        "D13/P7: Blank lines between blockquote list items and children should have list indentation.\nGot:\n{result}"
    );
}

#[test]
fn test_d13_blockquote_list_with_blank_continuation() {
    // Blockquote with rules list and blank continuation line.
    // Python: blank between "Rules:" and "1." has ">   " (3 spaces for bullet indent).
    let input = "> - Rules:\n>\n>   1. Look for duplicated code\n>\n>   2. Look for dead code\n";
    let python_output =
        "> - Rules:\n>   \n>   1. Look for duplicated code\n>\n>   2. Look for dead code\n";
    let result = fmt(input);
    assert_eq!(
        result, python_output,
        "D13/P7: Blank line between blockquote list item and child should have list indent.\nGot:\n{result}"
    );
}

// =============================================================================
// D14: Escaped backtick stripped in table inline code (P8)
// Python preserves \` inside inline code in table cells.
// Rust strips the trailing \` escape.
// Related to P3 (missing ESCAPE_CHARS) but specific to backtick in inline code.
// Discovered: Real-world corpus test (ai-trade-arena docs/, 1 instance).
// =============================================================================

#[test]
fn test_d14_escaped_backtick_in_table_inline_code() {
    let input = "| Col1 | Col2 |\n| --- | --- |\n| swallowing | `throw new CLIError(\\`${msg}: ${error.message}\\`)` |\n";
    let result = fmt(input);
    // Python preserves both \` escapes in inline code within the table cell.
    assert!(
        result.contains("\\`)` |") || result.contains("\\`)`|"),
        "D14/P8: Escaped backtick at end of inline code in table should be preserved.\nGot:\n{result}"
    );
}

// =============================================================================
// D15: Smart quote conversion after inline code backtick (P9)
// Python's behavior is CONTEXT-SENSITIVE: if the inline code content ends with
// a word character (e.g., `config`'s), the apostrophe IS converted to a smart
// quote (U+2019). If it ends with a non-word char (e.g., `foo()`'s), it stays
// ASCII. This was verified byte-by-byte against Python flowmark v0.6.4.
// Discovered: Real-world corpus test (2026-02-19).
// =============================================================================

fn fmt_auto(input: &str) -> String {
    // Match --auto mode: semantic + cleanups + smartquotes (no ellipses)
    fill_markdown(input, true, 88, true, true, true, false, None, ListSpacing::Preserve)
}

#[test]
fn test_d15_smart_quote_after_code_ending_with_word_char() {
    // Python converts apostrophe to smart quote when code ends with word char.
    // Verified: `config`'s → `config`\u{2019}s in Python v0.6.4.
    let input = "The `config`'s value is important.\n";
    let result = fmt_auto(input);
    assert!(
        result.contains("`\u{2019}s"),
        "D15/P9: Apostrophe after code ending with word char should be smart quote.\nGot:\n{result}"
    );
}

#[test]
fn test_d15_no_smart_quote_after_code_ending_with_non_word_char() {
    // Python keeps ASCII apostrophe when code ends with non-word char.
    // Verified: `foo()`'s → `foo()`'s (ASCII 0x27) in Python v0.6.4.
    let input = "Call `foo()`'s result.\n";
    let result = fmt_auto(input);
    assert!(
        result.contains("`'s"),
        "D15/P9: Apostrophe after code ending with non-word char should stay ASCII.\nGot:\n{result}"
    );
}

#[test]
fn test_d15_smart_quote_after_various_code_spans() {
    // Python converts for word-ending code, keeps ASCII for non-word-ending code.
    // Verified byte-by-byte against Python flowmark v0.6.4.
    let input = "Use `@react-spring/web`'s API and `x`'s type but `foo()`'s result.\n";
    let result = fmt_auto(input);
    // `web`'s → smart quote (word char 'b')
    assert!(
        result.contains("web`\u{2019}s"),
        "D15/P9: `web` ends with word char, apostrophe should be smart quote.\nGot:\n{result}"
    );
    // `x`'s → smart quote (word char 'x')
    assert!(
        result.contains("x`\u{2019}s"),
        "D15/P9: `x` ends with word char, apostrophe should be smart quote.\nGot:\n{result}"
    );
    // `foo()`'s → ASCII (non-word char ')')
    assert!(
        result.contains("foo()`'s"),
        "D15/P9: `foo()` ends with non-word char, apostrophe should stay ASCII.\nGot:\n{result}"
    );
}

// =============================================================================
// Regression: Autolink false positive for relative paths (already fixed)
// =============================================================================

#[test]
fn test_relative_path_link_preserved() {
    let input = "See [docs/port-sync-playbook.md](docs/port-sync-playbook.md) for details.\n";
    let result = fmt_semantic(input);
    assert!(
        result.contains("[docs/port-sync-playbook.md](docs/port-sync-playbook.md)"),
        "Relative path link where text==URL should be preserved as explicit link, got:\n{result}"
    );
}

#[test]
fn test_absolute_url_autolink_still_works() {
    let input = "Visit https://example.com for info.\n";
    let result = fmt(input);
    // Bare URL should remain as bare text (not wrapped in [text](url))
    assert!(
        !result.contains("[https://example.com](https://example.com)"),
        "Absolute URL autolink should render as bare text, got:\n{result}"
    );
}