mcp-confluence 1.1.0

use regex::Regex;

use crate::macros::process_confluence_macros;
use crate::types::AnyPage;

/// Strip HTML tags for plain text display.
pub fn strip_html(html: &str) -> String {
    let tag_re = Regex::new(r"<[^>]*>").unwrap();
    let result = tag_re.replace_all(html, "");
    result
        .replace("&nbsp;", " ")
        .replace("&amp;", "&")
        .replace("&lt;", "<")
        .replace("&gt;", ">")
        .replace("&quot;", "\"")
        .replace("&#39;", "'")
        .split_whitespace()
        .collect::<Vec<_>>()
        .join(" ")
}

/// Convert Confluence HTML to Markdown.
///
/// Processes macros, strips HTML tags, cleans up whitespace, and optionally truncates.
/// A full Turndown-style conversion is not included; instead this uses a practical
/// regex-based approach that handles most Confluence content well.
pub fn html_to_markdown(html: &str, truncate: bool, max_length: usize) -> String {
    if html.is_empty() {
        return String::new();
    }

    // Step 1: Process Confluence macros
    let processed = process_confluence_macros(html);

    // Step 2: Convert HTML to a Markdown-like format
    let mut md = html_to_md_basic(&processed);

    // Step 3: Clean up
    let multi_nl = Regex::new(r"\n{3,}").unwrap();
    md = multi_nl.replace_all(&md, "\n\n").trim().to_string();

    // Step 4: Truncate
    if truncate && max_length > 0 && md.len() > max_length {
        md = smart_truncate(&md, max_length);
    }

    md
}

/// Basic HTML → Markdown conversion via regex.
fn html_to_md_basic(html: &str) -> String {
    let mut s = html.to_string();

    // Headings
    for level in (1..=6).rev() {
        let hashes = "#".repeat(level);
        let open_re = Regex::new(&format!(r"(?i)<h{level}[^>]*>")).unwrap();
        let close_re = Regex::new(&format!(r"(?i)</h{level}>")).unwrap();
        s = open_re.replace_all(&s, &format!("\n{hashes} ")).to_string();
        s = close_re.replace_all(&s, "\n").to_string();
    }

    // Bold
    let strong_re = Regex::new(r"(?is)<strong>([\s\S]*?)</strong>").unwrap();
    s = strong_re.replace_all(&s, "**$1**").to_string();
    let b_re = Regex::new(r"(?is)<b>([\s\S]*?)</b>").unwrap();
    s = b_re.replace_all(&s, "**$1**").to_string();

    // Italic
    let em_re = Regex::new(r"(?is)<em>([\s\S]*?)</em>").unwrap();
    s = em_re.replace_all(&s, "*$1*").to_string();
    let i_re = Regex::new(r"(?is)<i>([\s\S]*?)</i>").unwrap();
    s = i_re.replace_all(&s, "*$1*").to_string();

    // Code (inline)
    let code_re = Regex::new(r"(?is)<code>([\s\S]*?)</code>").unwrap();
    s = code_re.replace_all(&s, "`$1`").to_string();

    // Pre blocks
    let pre_re = Regex::new(r"(?is)<pre[^>]*>([\s\S]*?)</pre>").unwrap();
    s = pre_re.replace_all(&s, "\n```\n$1\n```\n").to_string();

    // Links
    let a_re = Regex::new(r#"(?is)<a[^>]*href="([^"]*)"[^>]*>([\s\S]*?)</a>"#).unwrap();
    s = a_re.replace_all(&s, "[$2]($1)").to_string();

    // Images
    let img_re = Regex::new(r#"(?i)<img[^>]*alt="([^"]*)"[^>]*/?>"#).unwrap();
    s = img_re.replace_all(&s, "[Image: $1]").to_string();
    let img_re2 = Regex::new(r#"(?i)<img[^>]*src="([^"]*)"[^>]*/?>"#).unwrap();
    s = img_re2.replace_all(&s, "[Image: $1]").to_string();

    // Table cells – produce pipe-delimited format
    let th_re = Regex::new(r"(?is)<th[^>]*>([\s\S]*?)</th>").unwrap();
    s = th_re.replace_all(&s, "| **$1** ").to_string();
    let td_re = Regex::new(r"(?is)<td[^>]*>([\s\S]*?)</td>").unwrap();
    s = td_re.replace_all(&s, "| $1 ").to_string();
    let tr_re = Regex::new(r"(?is)<tr[^>]*>([\s\S]*?)</tr>").unwrap();
    s = tr_re.replace_all(&s, "$1|\n").to_string();

    // Lists
    let li_re = Regex::new(r"(?is)<li[^>]*>([\s\S]*?)</li>").unwrap();
    s = li_re.replace_all(&s, "- $1\n").to_string();

    // Paragraphs / breaks
    let p_re = Regex::new(r"(?i)<p[^>]*>").unwrap();
    s = p_re.replace_all(&s, "\n").to_string();
    let p_close = Regex::new(r"(?i)</p>").unwrap();
    s = p_close.replace_all(&s, "\n").to_string();
    let br_re = Regex::new(r"(?i)<br\s*/?\s*>").unwrap();
    s = br_re.replace_all(&s, "\n").to_string();
    let hr_re = Regex::new(r"(?i)<hr\s*/?\s*>").unwrap();
    s = hr_re.replace_all(&s, "\n---\n").to_string();

    // Strip remaining tags
    let tag_re = Regex::new(r"<[^>]*>").unwrap();
    s = tag_re.replace_all(&s, "").to_string();

    // Decode entities
    s = s
        .replace("&nbsp;", " ")
        .replace("&amp;", "&")
        .replace("&lt;", "<")
        .replace("&gt;", ">")
        .replace("&quot;", "\"")
        .replace("&#39;", "'");

    s
}

/// Smart truncation that tries to cut at natural boundaries.
pub fn smart_truncate(text: &str, max_length: usize) -> String {
    if text.len() <= max_length {
        return text.to_string();
    }

    let threshold_70 = (max_length as f64 * 0.7) as usize;
    let threshold_80 = (max_length as f64 * 0.8) as usize;

    let mut truncate_at = max_length;

    // Look for paragraph break
    if let Some(pos) = text[..max_length].rfind("\n\n") {
        if pos > threshold_70 {
            truncate_at = pos;
        }
    }

    if truncate_at == max_length {
        // Look for sentence break
        let sentence_break = [". ", "! ", "? "]
            .iter()
            .filter_map(|sep| text[..max_length].rfind(sep).map(|p| p + 1))
            .max();
        if let Some(pos) = sentence_break {
            if pos > threshold_70 {
                truncate_at = pos;
            }
        }
    }

    if truncate_at == max_length {
        // Look for word break
        if let Some(pos) = text[..max_length].rfind(' ') {
            if pos > threshold_80 {
                truncate_at = pos;
            }
        }
    }

    let truncated = text[..truncate_at].trim();
    let remaining = text.len() - truncate_at;
    let remaining_k = (remaining as f64 / 1000.0).round() as usize;

    format!(
        "{truncated}\n\n---\n⚠️ **Content truncated** (~{remaining_k}k characters remaining)\n\n\
         Use `read_page_outline` to see the page structure, then `read_page_section` to read specific sections."
    )
}

/// Format a space for display.
pub fn format_space(space: &crate::types::ConfluenceSpace) -> String {
    let mut lines = vec![
        format!("**{}**: {}", space.key, space.name),
        format!("- **Type**: {}", space.space_type),
        format!("- **Status**: {}", space.status),
    ];

    if let Some(desc) = space
        .description
        .as_ref()
        .and_then(|d| d.plain.as_ref())
        .map(|p| &p.value)
    {
        if !desc.is_empty() {
            lines.push(format!("- **Description**: {desc}"));
        }
    }

    lines.join("\n")
}

/// Format a page for display (basic – for search results / listings).
pub fn format_page(page: &AnyPage) -> String {
    let mut lines = vec![format!("**{}** (ID: {})", page.title(), page.id())];
    lines.push(format!("- **Status**: {}", page.status()));

    if let Some((key, name)) = page.space_info() {
        lines.push(format!("- **Space**: {name} ({key})"));
    }

    lines.push(format!("- **Version**: {}", page.version_number()));

    if let Some(date) = page.version_date() {
        lines.push(format!("- **Last Modified**: {date}"));
    }

    lines.join("\n")
}

/// Format a page with full details (including body content).
pub fn format_page_detailed(page: &AnyPage, host: &str, is_cloud: bool, max_content_length: usize) -> String {
    let mut sections: Vec<String> = Vec::new();

    sections.push(format!("# {}", page.title()));
    sections.push(String::new());

    // Basic info table
    let mut info = vec![
        "| Field | Value |".to_string(),
        "|-------|-------|".to_string(),
        format!("| **ID** | {} |", page.id()),
        format!("| **Status** | {} |", page.status()),
    ];

    if let Some((key, name)) = page.space_info() {
        info.push(format!("| **Space** | {name} ({key}) |"));
    }
    if let Some(sid) = page.space_id() {
        info.push(format!("| **Space ID** | {sid} |"));
    }

    info.push(format!("| **Version** | {} |", page.version_number()));

    if let Some(date) = page.version_date() {
        info.push(format!("| **Last Modified** | {date} |"));
    }
    if let Some(author) = page.version_author() {
        info.push(format!("| **Modified By** | {author} |"));
    }
    if let Some(ancs) = page.ancestors() {
        if !ancs.is_empty() {
            let breadcrumb: Vec<&str> = ancs.iter().map(|a| a.title.as_str()).collect();
            info.push(format!("| **Parent Path** | {} |", breadcrumb.join(" > ")));
        }
    }

    sections.push(info.join("\n"));
    sections.push(String::new());

    // Body content
    let body = page.storage_value();
    if !body.is_empty() {
        sections.push("## Content".to_string());
        sections.push(String::new());
        sections.push(html_to_markdown(body, true, max_content_length));
    }

    // URL
    if let Some(webui) = page.webui_link() {
        let full_url = if webui.starts_with('/') {
            let prefix = if is_cloud { "/wiki" } else { "" };
            format!("{host}{prefix}{webui}")
        } else {
            webui.to_string()
        };
        sections.push(String::new());
        sections.push("---".to_string());
        sections.push(format!("**URL**: {full_url}"));
    }

    sections.join("\n")
}

/// Format bytes to human readable size.
pub fn format_bytes(bytes: u64) -> String {
    if bytes == 0 {
        return "0 Bytes".to_string();
    }
    let units = ["Bytes", "KB", "MB", "GB"];
    let k: f64 = 1024.0;
    let i = (bytes as f64).ln() / k.ln();
    let i = i.floor() as usize;
    let i = i.min(units.len() - 1);
    let size = bytes as f64 / k.powi(i as i32);
    format!("{:.2} {}", size, units[i])
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::types::*;

    // ── strip_html ──

    #[test]
    fn strip_html_basic() {
        assert_eq!(strip_html("<p>Hello <strong>world</strong></p>"), "Hello world");
    }

    #[test]
    fn strip_html_empty() {
        assert_eq!(strip_html(""), "");
    }

    #[test]
    fn strip_html_entities() {
        assert_eq!(strip_html("A &amp; B &lt; C"), "A & B < C");
    }

    #[test]
    fn strip_html_collapses_whitespace() {
        assert_eq!(strip_html("<p>  lots   of   spaces  </p>"), "lots of spaces");
    }

    #[test]
    fn strip_html_nested_tags() {
        assert_eq!(
            strip_html("<div><ul><li>Item</li></ul></div>"),
            "Item"
        );
    }

    // ── html_to_markdown ──

    #[test]
    fn html_to_markdown_empty() {
        assert_eq!(html_to_markdown("", false, 0), "");
    }

    #[test]
    fn html_to_markdown_headings() {
        let html = "<h1>Title</h1><h2>Subtitle</h2>";
        let md = html_to_markdown(html, false, 0);
        assert!(md.contains("# Title"));
        assert!(md.contains("## Subtitle"));
    }

    #[test]
    fn html_to_markdown_bold_and_italic() {
        let html = "<strong>bold</strong> and <em>italic</em>";
        let md = html_to_markdown(html, false, 0);
        assert!(md.contains("**bold**"));
        assert!(md.contains("*italic*"));
    }

    #[test]
    fn html_to_markdown_inline_code() {
        let html = "<code>foo()</code>";
        let md = html_to_markdown(html, false, 0);
        assert!(md.contains("`foo()`"));
    }

    #[test]
    fn html_to_markdown_link() {
        let html = r#"<a href="https://example.com">click</a>"#;
        let md = html_to_markdown(html, false, 0);
        assert!(md.contains("[click](https://example.com)"));
    }

    #[test]
    fn html_to_markdown_table() {
        let html = "<table><tr><th>Name</th><th>Age</th></tr><tr><td>Alice</td><td>30</td></tr></table>";
        let md = html_to_markdown(html, false, 0);
        assert!(md.contains("**Name**"));
        assert!(md.contains("Alice"));
        assert!(md.contains("|"));
    }

    #[test]
    fn html_to_markdown_list() {
        let html = "<ul><li>One</li><li>Two</li></ul>";
        let md = html_to_markdown(html, false, 0);
        assert!(md.contains("- One"));
        assert!(md.contains("- Two"));
    }

    #[test]
    fn html_to_markdown_hr() {
        let html = "<p>Before</p><hr/><p>After</p>";
        let md = html_to_markdown(html, false, 0);
        assert!(md.contains("---"));
    }

    #[test]
    fn html_to_markdown_with_truncation() {
        let html = "<p>A</p>".repeat(1000);
        let md = html_to_markdown(&html, true, 100);
        assert!(md.len() < 300); // truncated + message
        assert!(md.contains("truncated"));
    }

    #[test]
    fn html_to_markdown_no_truncation_when_short() {
        let html = "<p>Short</p>";
        let md = html_to_markdown(html, true, 10000);
        assert!(!md.contains("truncated"));
    }

    #[test]
    fn html_to_markdown_pre_block() {
        let html = "<pre>code block</pre>";
        let md = html_to_markdown(html, false, 0);
        assert!(md.contains("```"));
        assert!(md.contains("code block"));
    }

    // ── smart_truncate ──

    #[test]
    fn smart_truncate_short_text() {
        let text = "Hello world";
        assert_eq!(smart_truncate(text, 100), text);
    }

    #[test]
    fn smart_truncate_at_paragraph() {
        let text = "First paragraph.\n\nSecond paragraph that makes it longer.";
        let result = smart_truncate(text, 30);
        assert!(result.contains("First paragraph."));
        assert!(result.contains("truncated"));
    }

    #[test]
    fn smart_truncate_at_sentence() {
        let text = "First sentence. Second sentence that pushes past the limit here.";
        let result = smart_truncate(text, 50);
        assert!(result.contains("truncated"));
    }

    #[test]
    fn smart_truncate_at_word() {
        let text = "word1 word2 word3 word4 word5 word6 word7 word8 word9 word10";
        let result = smart_truncate(text, 30);
        assert!(result.contains("truncated"));
        // Should break at a word boundary, not mid-word
        assert!(result.starts_with("word1 word2 word3 word4 word5"));
    }

    // ── format_bytes ──

    #[test]
    fn format_bytes_zero() {
        assert_eq!(format_bytes(0), "0 Bytes");
    }

    #[test]
    fn format_bytes_bytes() {
        assert_eq!(format_bytes(500), "500.00 Bytes");
    }

    #[test]
    fn format_bytes_kb() {
        assert_eq!(format_bytes(1024), "1.00 KB");
    }

    #[test]
    fn format_bytes_mb() {
        assert_eq!(format_bytes(1024 * 1024), "1.00 MB");
    }

    #[test]
    fn format_bytes_gb() {
        assert_eq!(format_bytes(1024 * 1024 * 1024), "1.00 GB");
    }

    #[test]
    fn format_bytes_fractional() {
        assert_eq!(format_bytes(1536), "1.50 KB");
    }

    // ── format_space ──

    #[test]
    fn format_space_basic() {
        let space = ConfluenceSpace {
            id: "1".to_string(),
            key: "DEV".to_string(),
            name: "Development".to_string(),
            space_type: "global".to_string(),
            status: "current".to_string(),
            description: None,
            links: None,
        };
        let result = format_space(&space);
        assert!(result.contains("**DEV**: Development"));
        assert!(result.contains("global"));
        assert!(result.contains("current"));
    }

    #[test]
    fn format_space_with_description() {
        let space = ConfluenceSpace {
            id: "1".to_string(),
            key: "QA".to_string(),
            name: "Quality Assurance".to_string(),
            space_type: "global".to_string(),
            status: "current".to_string(),
            description: Some(SpaceDescription {
                plain: Some(PlainValue {
                    value: "QA team space".to_string(),
                }),
            }),
            links: None,
        };
        let result = format_space(&space);
        assert!(result.contains("QA team space"));
    }

    // ── format_page ──

    #[test]
    fn format_page_v1() {
        let page = ConfluencePageV1 {
            id: "42".to_string(),
            page_type: "page".to_string(),
            status: "current".to_string(),
            title: "Test Page".to_string(),
            space: Some(SpaceRef {
                key: "TS".to_string(),
                name: "Test Space".to_string(),
            }),
            version: Some(PageVersionV1 {
                number: 5,
                when: Some("2024-01-01".to_string()),
                by: None,
            }),
            body: None,
            ancestors: None,
            links: None,
        };
        let any = AnyPage::V1(page);
        let result = format_page(&any);
        assert!(result.contains("**Test Page** (ID: 42)"));
        assert!(result.contains("Test Space (TS)"));
        assert!(result.contains("Version**: 5"));
        assert!(result.contains("2024-01-01"));
    }

    #[test]
    fn format_page_v2() {
        let page = ConfluencePage {
            id: "99".to_string(),
            status: "current".to_string(),
            title: "Cloud Page".to_string(),
            space_id: Some("space-1".to_string()),
            parent_id: None,
            version: Some(PageVersion {
                number: 3,
                message: None,
                created_at: Some("2024-06-15".to_string()),
            }),
            body: None,
            links: None,
        };
        let any = AnyPage::V2(page);
        let result = format_page(&any);
        assert!(result.contains("**Cloud Page** (ID: 99)"));
        assert!(result.contains("Version**: 3"));
    }

    // ── format_page_detailed ──

    #[test]
    fn format_page_detailed_with_content() {
        let page = ConfluencePageV1 {
            id: "10".to_string(),
            page_type: "page".to_string(),
            status: "current".to_string(),
            title: "Detailed Page".to_string(),
            space: Some(SpaceRef {
                key: "DS".to_string(),
                name: "Detail Space".to_string(),
            }),
            version: Some(PageVersionV1 {
                number: 2,
                when: None,
                by: Some(VersionAuthor {
                    display_name: "Jane Doe".to_string(),
                }),
            }),
            body: Some(PageBody {
                storage: Some(StorageBody {
                    value: "<p>Hello world</p>".to_string(),
                    representation: Some("storage".to_string()),
                }),
                view: None,
            }),
            ancestors: Some(vec![AncestorRef {
                id: "1".to_string(),
                title: "Root".to_string(),
            }]),
            links: Some(PageLinksV1 {
                webui: Some("/pages/10".to_string()),
                edit: None,
                self_link: None,
            }),
        };
        let any = AnyPage::V1(page);
        let result = format_page_detailed(&any, "https://wiki.example.com", false, 30000);
        assert!(result.contains("# Detailed Page"));
        assert!(result.contains("Hello world"));
        assert!(result.contains("Jane Doe"));
        assert!(result.contains("Root"));
        assert!(result.contains("https://wiki.example.com/pages/10"));
    }

    #[test]
    fn format_page_detailed_cloud_url() {
        let page = ConfluencePage {
            id: "20".to_string(),
            status: "current".to_string(),
            title: "Cloud Detailed".to_string(),
            space_id: Some("sp-1".to_string()),
            parent_id: None,
            version: Some(PageVersion {
                number: 1,
                message: None,
                created_at: None,
            }),
            body: None,
            links: Some(PageLinks {
                webui: Some("/pages/20".to_string()),
                editui: None,
            }),
        };
        let any = AnyPage::V2(page);
        let result = format_page_detailed(&any, "https://mysite.atlassian.net", true, 30000);
        assert!(result.contains("https://mysite.atlassian.net/wiki/pages/20"));
    }
}