anytomd 1.3.0 - Docs.rs

//! PPTX (Office Open XML Presentation) to Markdown converter.
//!
//! Parses PPTX files directly from their OOXML ZIP structure. Extracts slide titles,
//! body text, tables, speaker notes, embedded images, and content from group shapes
//! (`<p:grpSp>`). Each slide becomes a `## Slide N: Title` section separated by
//! horizontal rules.

use std::collections::HashMap;
use std::io::Cursor;

use quick_xml::Reader;
use quick_xml::events::Event;
use zip::ZipArchive;

use crate::converter::comments::{self, Comment};
use crate::converter::ooxml_utils::{
    ImageInfo, PendingImageResolution, Relationship, attr_value_unescaped, derive_rels_path,
    parse_relationships, resolve_image_placeholders, resolve_relative_to_file,
};
use crate::converter::{
    ConversionOptions, ConversionResult, ConversionWarning, Converter, WarningCode,
};
use crate::error::ConvertError;
use crate::markdown::{build_table, build_table_plain};
use crate::zip_utils::{read_zip_bytes, read_zip_text, read_zip_text_lossy};

/// Converts PPTX files to Markdown.
pub struct PptxConverter;

// ---- Data types ----

/// Information about a slide in presentation order.
#[derive(Debug, Clone)]
struct SlideInfo {
    number: usize,
    path: String,
}

/// The type of placeholder in a shape.
#[derive(Debug, Clone, PartialEq)]
enum PlaceholderType {
    Title,
    CenterTitle,
    SubTitle,
    Body,
    Other,
}

/// Content extracted from a single shape on a slide.
#[derive(Debug, Clone)]
enum ShapeContent {
    Title(String),
    Body(String),
    Table {
        headers: Vec<String>,
        rows: Vec<Vec<String>>,
    },
    Image {
        rel_id: String,
        alt_text: Option<String>,
    },
}

// ---- Slide order resolution ----

/// Parse presentation.xml and its rels to determine slide order.
///
/// Returns slides in presentation order (as defined by `<p:sldIdLst>`).
fn resolve_slide_order(
    pres_xml: &str,
    pres_rels: &HashMap<String, Relationship>,
) -> Vec<SlideInfo> {
    let mut reader = Reader::from_str(pres_xml);
    let mut rel_ids: Vec<String> = Vec::new();

    loop {
        match reader.read_event() {
            Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => {
                let local = e.local_name();
                let local_str = std::str::from_utf8(local.as_ref()).unwrap_or("");

                if local_str == "sldId" {
                    for attr in e.attributes().flatten() {
                        let key = std::str::from_utf8(attr.key.as_ref()).unwrap_or("");
                        if key == "r:id" || key.ends_with(":id") {
                            let val = String::from_utf8_lossy(&attr.value).to_string();
                            rel_ids.push(val);
                        }
                    }
                }
            }
            Ok(Event::Eof) => break,
            Err(_) => break,
            _ => {}
        }
    }

    let mut slides = Vec::new();
    for (i, rid) in rel_ids.iter().enumerate() {
        if let Some(rel) = pres_rels.get(rid) {
            // Target is relative to ppt/ directory, e.g., "slides/slide1.xml"
            let path = if rel.target.starts_with("ppt/") {
                rel.target.clone()
            } else {
                format!("ppt/{}", rel.target)
            };
            slides.push(SlideInfo {
                number: i + 1,
                path,
            });
        }
    }

    slides
}

// ---- Slide content parsing ----

/// Parse a slide XML and extract shape contents in document order.
///
/// Returns (shapes, warnings).
fn parse_slide(xml: &str) -> (Vec<ShapeContent>, Vec<ConversionWarning>) {
    let mut reader = Reader::from_str(xml);
    let mut shapes: Vec<ShapeContent> = Vec::new();
    let mut warnings: Vec<ConversionWarning> = Vec::new();

    // Shape-level state
    let mut in_shape = false; // inside <p:sp>
    let mut in_graphic_frame = false; // inside <p:graphicFrame>
    let mut in_picture = false; // inside <p:pic>
    let mut placeholder_type: Option<PlaceholderType> = None;

    // Text body state
    let mut in_text_body = false;
    let mut in_paragraph = false;
    let mut in_run = false;
    let mut in_text = false;
    let mut current_paragraph = String::new();
    let mut shape_paragraphs: Vec<String> = Vec::new();

    // Table state
    let mut in_table = false;
    let mut in_table_row = false;
    let mut in_table_cell = false;
    let mut table_rows: Vec<Vec<String>> = Vec::new();
    let mut current_row: Vec<String> = Vec::new();
    let mut current_cell = String::new();
    // Track text state within table cells
    let mut in_cell_paragraph = false;
    let mut in_cell_run = false;
    let mut in_cell_text = false;

    // Image state
    let mut current_blip_rel_id: Option<String> = None;
    let mut current_image_alt: Option<String> = None;

    // Track depth for nested elements
    let mut shape_depth: u32 = 0;
    let mut graphic_frame_depth: u32 = 0;
    let mut picture_depth: u32 = 0;

    // Group shape depth: <p:grpSp> is a transparent container — child shapes
    // (sp, graphicFrame, pic) are processed normally. The counter tracks nesting
    // for proper End-tag matching but does not gate any logic.
    let mut group_depth: u32 = 0;

    loop {
        match reader.read_event() {
            Ok(Event::Start(ref e)) => {
                let local = e.local_name();
                let local_str = std::str::from_utf8(local.as_ref()).unwrap_or("");

                match local_str {
                    "grpSp" if !in_shape && !in_graphic_frame && !in_picture => {
                        group_depth += 1;
                    }
                    "sp" if !in_shape && !in_graphic_frame && !in_picture => {
                        in_shape = true;
                        shape_depth = 1;
                        placeholder_type = None;
                        shape_paragraphs.clear();
                    }
                    "graphicFrame" if !in_shape && !in_graphic_frame && !in_picture => {
                        in_graphic_frame = true;
                        graphic_frame_depth = 1;
                    }
                    "pic" if !in_shape && !in_graphic_frame && !in_picture => {
                        in_picture = true;
                        picture_depth = 1;
                        current_blip_rel_id = None;
                        current_image_alt = None;
                    }
                    _ if in_shape => {
                        shape_depth += 1;
                        handle_shape_start(
                            local_str,
                            e,
                            &mut placeholder_type,
                            &mut in_text_body,
                            &mut in_paragraph,
                            &mut in_run,
                            &mut in_text,
                            &mut current_paragraph,
                        );
                    }
                    _ if in_graphic_frame => {
                        graphic_frame_depth += 1;
                        handle_graphic_frame_start(
                            local_str,
                            &mut in_table,
                            &mut in_table_row,
                            &mut in_table_cell,
                            &mut in_cell_paragraph,
                            &mut in_cell_run,
                            &mut in_cell_text,
                            &mut current_cell,
                            &mut current_row,
                            &mut table_rows,
                        );
                    }
                    _ if in_picture => {
                        picture_depth += 1;
                        handle_picture_start(
                            local_str,
                            e,
                            &mut current_blip_rel_id,
                            &mut current_image_alt,
                        );
                    }
                    _ => {}
                }
            }
            Ok(Event::Empty(ref e)) => {
                let local = e.local_name();
                let local_str = std::str::from_utf8(local.as_ref()).unwrap_or("");

                if in_shape {
                    handle_shape_empty(
                        local_str,
                        e,
                        &mut placeholder_type,
                        in_run,
                        &mut current_paragraph,
                    );
                } else if in_graphic_frame {
                    handle_graphic_frame_empty(local_str, in_cell_run, &mut current_cell);
                } else if in_picture {
                    handle_picture_start(
                        local_str,
                        e,
                        &mut current_blip_rel_id,
                        &mut current_image_alt,
                    );
                }
            }
            Ok(Event::Text(ref e)) => {
                if in_shape && in_text && in_run {
                    let text = e.unescape().unwrap_or_default().to_string();
                    current_paragraph.push_str(&text);
                } else if in_graphic_frame && in_cell_text && in_cell_run {
                    let text = e.unescape().unwrap_or_default().to_string();
                    current_cell.push_str(&text);
                }
            }
            Ok(Event::End(ref e)) => {
                let local = e.local_name();
                let local_str = std::str::from_utf8(local.as_ref()).unwrap_or("");

                if in_shape {
                    shape_depth -= 1;

                    match local_str {
                        "t" => in_text = false,
                        "r" => {
                            in_run = false;
                            in_text = false;
                        }
                        "p" if in_paragraph => {
                            let para = current_paragraph.clone();
                            if !para.is_empty() {
                                shape_paragraphs.push(para);
                            }
                            current_paragraph.clear();
                            in_paragraph = false;
                        }
                        "txBody" => in_text_body = false,
                        _ => {}
                    }

                    if shape_depth == 0 {
                        // Finalize shape
                        let content = finalize_shape(&placeholder_type, &shape_paragraphs);
                        if let Some(c) = content {
                            shapes.push(c);
                        }
                        in_shape = false;
                        placeholder_type = None;
                        shape_paragraphs.clear();
                        in_text_body = false;
                        in_paragraph = false;
                        in_run = false;
                        in_text = false;
                    }
                } else if in_graphic_frame {
                    graphic_frame_depth -= 1;

                    match local_str {
                        "t" if in_table_cell => in_cell_text = false,
                        "r" if in_table_cell => {
                            in_cell_run = false;
                            in_cell_text = false;
                        }
                        "p" if in_cell_paragraph => {
                            in_cell_paragraph = false;
                        }
                        "tc" if in_table_cell => {
                            current_row.push(current_cell.trim().to_string());
                            current_cell.clear();
                            in_table_cell = false;
                            in_cell_paragraph = false;
                            in_cell_run = false;
                            in_cell_text = false;
                        }
                        "tr" if in_table_row => {
                            table_rows.push(current_row.clone());
                            current_row.clear();
                            in_table_row = false;
                        }
                        "tbl" if in_table => {
                            // Finalize table
                            if !table_rows.is_empty() {
                                let headers = table_rows[0].clone();
                                let data_rows = if table_rows.len() > 1 {
                                    table_rows[1..].to_vec()
                                } else {
                                    Vec::new()
                                };
                                shapes.push(ShapeContent::Table {
                                    headers,
                                    rows: data_rows,
                                });
                            }
                            table_rows.clear();
                            in_table = false;
                        }
                        _ => {}
                    }

                    if graphic_frame_depth == 0 {
                        in_graphic_frame = false;
                        in_table = false;
                        in_table_row = false;
                        in_table_cell = false;
                        in_cell_paragraph = false;
                        in_cell_run = false;
                        in_cell_text = false;
                    }
                } else if in_picture {
                    picture_depth -= 1;

                    if picture_depth == 0 {
                        if let Some(rel_id) = current_blip_rel_id.take() {
                            shapes.push(ShapeContent::Image {
                                rel_id,
                                alt_text: current_image_alt.take(),
                            });
                        }
                        in_picture = false;
                        current_image_alt = None;
                    }
                } else if local_str == "grpSp" && group_depth > 0 {
                    group_depth -= 1;
                }
            }
            Ok(Event::Eof) => break,
            Err(err) => {
                warnings.push(ConversionWarning {
                    code: WarningCode::MalformedSegment,
                    message: format!("XML parse error in slide: {err}"),
                    location: None,
                });
                break;
            }
            _ => {}
        }
    }

    (shapes, warnings)
}

/// Handle a Start event inside a <p:sp> shape.
#[allow(clippy::too_many_arguments)]
fn handle_shape_start(
    local_str: &str,
    e: &quick_xml::events::BytesStart,
    placeholder_type: &mut Option<PlaceholderType>,
    in_text_body: &mut bool,
    in_paragraph: &mut bool,
    in_run: &mut bool,
    in_text: &mut bool,
    current_paragraph: &mut String,
) {
    match local_str {
        "ph" => {
            // <p:ph type="title"/> or <p:ph type="ctrTitle"/> etc.
            let mut ph_type = PlaceholderType::Other;
            for attr in e.attributes().flatten() {
                let local_name = attr.key.local_name();
                let key = std::str::from_utf8(local_name.as_ref()).unwrap_or("");
                if key == "type" {
                    let val = String::from_utf8_lossy(&attr.value);
                    ph_type = match val.as_ref() {
                        "title" => PlaceholderType::Title,
                        "ctrTitle" => PlaceholderType::CenterTitle,
                        "subTitle" => PlaceholderType::SubTitle,
                        "body" => PlaceholderType::Body,
                        _ => PlaceholderType::Other,
                    };
                }
            }
            *placeholder_type = Some(ph_type);
        }
        "txBody" => {
            *in_text_body = true;
        }
        "p" if *in_text_body => {
            *in_paragraph = true;
            current_paragraph.clear();
        }
        "r" if *in_paragraph => {
            *in_run = true;
        }
        "t" if *in_run => {
            *in_text = true;
        }
        _ => {}
    }
}

/// Handle an Empty event inside a <p:sp> shape.
fn handle_shape_empty(
    local_str: &str,
    e: &quick_xml::events::BytesStart,
    placeholder_type: &mut Option<PlaceholderType>,
    in_run: bool,
    current_paragraph: &mut String,
) {
    match local_str {
        "ph" => {
            let mut ph_type = PlaceholderType::Other;
            for attr in e.attributes().flatten() {
                let local_name = attr.key.local_name();
                let key = std::str::from_utf8(local_name.as_ref()).unwrap_or("");
                if key == "type" {
                    let val = String::from_utf8_lossy(&attr.value);
                    ph_type = match val.as_ref() {
                        "title" => PlaceholderType::Title,
                        "ctrTitle" => PlaceholderType::CenterTitle,
                        "subTitle" => PlaceholderType::SubTitle,
                        "body" => PlaceholderType::Body,
                        _ => PlaceholderType::Other,
                    };
                }
            }
            *placeholder_type = Some(ph_type);
        }
        "br" if in_run => {
            current_paragraph.push('\n');
        }
        _ => {}
    }
}

/// Handle a Start event inside a <p:graphicFrame>.
#[allow(clippy::too_many_arguments)]
fn handle_graphic_frame_start(
    local_str: &str,
    in_table: &mut bool,
    in_table_row: &mut bool,
    in_table_cell: &mut bool,
    in_cell_paragraph: &mut bool,
    in_cell_run: &mut bool,
    in_cell_text: &mut bool,
    current_cell: &mut String,
    current_row: &mut Vec<String>,
    table_rows: &mut Vec<Vec<String>>,
) {
    match local_str {
        "tbl" => {
            *in_table = true;
            table_rows.clear();
        }
        "tr" if *in_table => {
            *in_table_row = true;
            current_row.clear();
        }
        "tc" if *in_table_row => {
            *in_table_cell = true;
            current_cell.clear();
        }
        "p" if *in_table_cell => {
            // Add space separator between paragraphs in the same cell
            if !current_cell.is_empty() {
                current_cell.push(' ');
            }
            *in_cell_paragraph = true;
        }
        "r" if *in_cell_paragraph => {
            *in_cell_run = true;
        }
        "t" if *in_cell_run => {
            *in_cell_text = true;
        }
        _ => {}
    }
}

/// Handle an Empty event inside a <p:graphicFrame>.
fn handle_graphic_frame_empty(local_str: &str, in_cell_run: bool, current_cell: &mut String) {
    if local_str == "br" && in_cell_run {
        current_cell.push(' ');
    }
}

/// Handle a Start/Empty event inside a <p:pic>.
fn handle_picture_start(
    local_str: &str,
    e: &quick_xml::events::BytesStart,
    current_blip_rel_id: &mut Option<String>,
    current_image_alt: &mut Option<String>,
) {
    match local_str {
        "blip" => {
            for attr in e.attributes().flatten() {
                let key = std::str::from_utf8(attr.key.as_ref()).unwrap_or("");
                if key == "r:embed" || key.ends_with(":embed") {
                    let val = String::from_utf8_lossy(&attr.value).to_string();
                    *current_blip_rel_id = Some(val);
                }
            }
        }
        "cNvPr" => {
            for attr in e.attributes().flatten() {
                let local_name = attr.key.local_name();
                let key = std::str::from_utf8(local_name.as_ref()).unwrap_or("");
                if key == "descr" {
                    let val = String::from_utf8_lossy(&attr.value).to_string();
                    if !val.is_empty() {
                        *current_image_alt = Some(val);
                    }
                }
            }
        }
        _ => {}
    }
}

/// Finalize a shape into a ShapeContent based on its placeholder type and paragraphs.
fn finalize_shape(
    placeholder_type: &Option<PlaceholderType>,
    paragraphs: &[String],
) -> Option<ShapeContent> {
    if paragraphs.is_empty() {
        return None;
    }

    let text = paragraphs.join("\n");
    let text = text.trim().to_string();
    if text.is_empty() {
        return None;
    }

    match placeholder_type {
        Some(PlaceholderType::Title) | Some(PlaceholderType::CenterTitle) => {
            Some(ShapeContent::Title(text))
        }
        Some(PlaceholderType::SubTitle) => Some(ShapeContent::Body(text)),
        Some(PlaceholderType::Body) => Some(ShapeContent::Body(text)),
        Some(PlaceholderType::Other) | None => {
            // Shapes without a known placeholder type are treated as body text
            Some(ShapeContent::Body(text))
        }
    }
}

// ---- Notes parsing ----

/// Parse a notes slide XML and extract the body text.
///
/// Only extracts text from the body placeholder (ignores slide number placeholders).
fn parse_notes(xml: &str) -> Option<String> {
    let mut reader = Reader::from_str(xml);

    let mut in_shape = false;
    let mut shape_depth: u32 = 0;
    let mut is_body_placeholder = false;

    let mut in_text_body = false;
    let mut in_paragraph = false;
    let mut in_run = false;
    let mut in_text = false;
    let mut current_paragraph = String::new();
    let mut paragraphs: Vec<String> = Vec::new();

    loop {
        match reader.read_event() {
            Ok(Event::Start(ref e)) => {
                let local = e.local_name();
                let local_str = std::str::from_utf8(local.as_ref()).unwrap_or("");

                if local_str == "sp" && !in_shape {
                    in_shape = true;
                    shape_depth = 1;
                    is_body_placeholder = false;
                    paragraphs.clear();
                } else if in_shape {
                    shape_depth += 1;
                    match local_str {
                        "ph" => {
                            for attr in e.attributes().flatten() {
                                let local_name = attr.key.local_name();
                                let key = std::str::from_utf8(local_name.as_ref()).unwrap_or("");
                                if key == "type" {
                                    let val = String::from_utf8_lossy(&attr.value);
                                    if val.as_ref() == "body" {
                                        is_body_placeholder = true;
                                    }
                                }
                            }
                        }
                        "txBody" => in_text_body = true,
                        "p" if in_text_body => {
                            in_paragraph = true;
                            current_paragraph.clear();
                        }
                        "r" if in_paragraph => in_run = true,
                        "t" if in_run => in_text = true,
                        _ => {}
                    }
                }
            }
            Ok(Event::Empty(ref e)) => {
                let local = e.local_name();
                let local_str = std::str::from_utf8(local.as_ref()).unwrap_or("");

                if in_shape {
                    if local_str == "ph" {
                        for attr in e.attributes().flatten() {
                            let local_name = attr.key.local_name();
                            let key = std::str::from_utf8(local_name.as_ref()).unwrap_or("");
                            if key == "type" {
                                let val = String::from_utf8_lossy(&attr.value);
                                if val.as_ref() == "body" {
                                    is_body_placeholder = true;
                                }
                            }
                        }
                    } else if local_str == "br" && in_run {
                        current_paragraph.push('\n');
                    }
                }
            }
            Ok(Event::Text(ref e)) if in_shape && in_text && in_run => {
                let text = e.unescape().unwrap_or_default().to_string();
                current_paragraph.push_str(&text);
            }
            Ok(Event::End(ref e)) => {
                let local = e.local_name();
                let local_str = std::str::from_utf8(local.as_ref()).unwrap_or("");

                if in_shape {
                    shape_depth -= 1;

                    match local_str {
                        "t" => in_text = false,
                        "r" => {
                            in_run = false;
                            in_text = false;
                        }
                        "p" if in_paragraph => {
                            if !current_paragraph.is_empty() {
                                paragraphs.push(current_paragraph.clone());
                            }
                            current_paragraph.clear();
                            in_paragraph = false;
                        }
                        "txBody" => in_text_body = false,
                        _ => {}
                    }

                    if shape_depth == 0 {
                        if is_body_placeholder && !paragraphs.is_empty() {
                            let text = paragraphs.join("\n").trim().to_string();
                            if !text.is_empty() {
                                return Some(text);
                            }
                        }
                        in_shape = false;
                        is_body_placeholder = false;
                        paragraphs.clear();
                        in_text_body = false;
                        in_paragraph = false;
                        in_run = false;
                        in_text = false;
                    }
                }
            }
            Ok(Event::Eof) => break,
            Err(_) => break,
            _ => {}
        }
    }

    None
}

/// Find the notes slide path from a slide's relationships.
fn resolve_notes_path(slide_rels: &HashMap<String, Relationship>) -> Option<String> {
    for rel in slide_rels.values() {
        if rel.rel_type.contains("notesSlide") {
            return Some(rel.target.clone());
        }
    }
    None
}

// ---- Markdown rendering ----

/// Render a single slide's content as Markdown.
///
/// Images are emitted with unique placeholder alt text `__img_N__`.
/// `image_counter` is incremented for each image to ensure uniqueness.
/// Returns (markdown, image_infos).
/// Render a slide into both markdown and plain text.
///
/// Returns `(markdown, plain_text, image_infos)`.
fn render_slide(
    number: usize,
    shapes: &[ShapeContent],
    notes: &Option<String>,
    image_filenames: &HashMap<String, String>,
    slide_key: &str,
    image_counter: &mut usize,
) -> (String, String, Vec<ImageInfo>) {
    let mut out = String::new();
    let mut plain = String::new();
    let mut image_infos: Vec<ImageInfo> = Vec::new();

    // Find the title
    let title = shapes.iter().find_map(|s| {
        if let ShapeContent::Title(t) = s {
            Some(t.as_str())
        } else {
            None
        }
    });

    // Slide heading
    if let Some(title_text) = title {
        out.push_str(&format!("## Slide {number}: {title_text}\n\n"));
        plain.push_str(&format!("{title_text}\n\n"));
    } else {
        out.push_str(&format!("## Slide {number}\n\n"));
        plain.push('\n');
    }

    // Body content, tables, and images (skip title since it's already in heading)
    for shape in shapes {
        match shape {
            ShapeContent::Title(_) => {} // Already rendered as heading
            ShapeContent::Body(text) => {
                out.push_str(text);
                out.push_str("\n\n");
                plain.push_str(text);
                plain.push_str("\n\n");
            }
            ShapeContent::Table { headers, rows } => {
                let header_refs: Vec<&str> = headers.iter().map(|s| s.as_str()).collect();
                let row_refs: Vec<Vec<&str>> = rows
                    .iter()
                    .map(|r| r.iter().map(|s| s.as_str()).collect())
                    .collect();
                out.push_str(&build_table(&header_refs, &row_refs));
                out.push('\n');
                plain.push_str(&build_table_plain(&header_refs, &row_refs));
                plain.push('\n');
            }
            ShapeContent::Image { rel_id, alt_text } => {
                if let Some(filename) = image_filenames.get(rel_id) {
                    let original_alt = alt_text.as_deref().unwrap_or("").to_string();
                    let placeholder = format!("__img_{n}__", n = *image_counter);
                    *image_counter += 1;
                    image_infos.push(ImageInfo {
                        placeholder: placeholder.clone(),
                        original_alt: original_alt.clone(),
                        filename: filename.clone(),
                        bytes_key: format!("{slide_key}::{rel_id}"),
                    });
                    out.push_str(&format!("![{placeholder}]({filename})\n\n"));
                    // Plain text: image description placeholder (resolved later)
                    plain.push_str(&format!("{placeholder}\n\n"));
                }
            }
        }
    }

    // Notes
    if let Some(notes_text) = notes {
        let lines: Vec<&str> = notes_text.lines().collect();
        if !lines.is_empty() {
            out.push_str(&format!("> Note: {}", lines[0]));
            for line in &lines[1..] {
                out.push_str(&format!("\n> {line}"));
            }
            out.push_str("\n\n");
            // Plain text: notes without blockquote prefix
            plain.push_str(notes_text);
            plain.push_str("\n\n");
        }
    }

    // Trim trailing whitespace
    (
        out.trim_end().to_string(),
        plain.trim_end().to_string(),
        image_infos,
    )
}

// ---- Converter trait impl ----

// ---- Comment extraction ----

/// Parse an author registry, mapping each author `id` to its `name`.
///
/// `elem` is the author element's local name — `cmAuthor` for the legacy
/// `ppt/commentAuthors.xml` (`<p:cmAuthor id="0" name="Julie Lee"/>`) or
/// `author` for the modern `ppt/authors.xml` (`<p188:author id="{GUID}"
/// name="Julie Lee"/>`). Names are XML-unescaped.
fn parse_author_registry(xml: &str, elem: &str) -> HashMap<String, String> {
    let mut reader = Reader::from_str(xml);
    let mut authors = HashMap::new();
    loop {
        match reader.read_event() {
            Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => {
                let local = e.local_name();
                if std::str::from_utf8(local.as_ref()).unwrap_or("") == elem
                    && let Some(id) = attr_value_unescaped(e, "id")
                {
                    let name = attr_value_unescaped(e, "name").unwrap_or_default();
                    authors.insert(id, name);
                }
            }
            Ok(Event::Eof) => break,
            Err(_) => break,
            _ => {}
        }
    }
    authors
}

/// A raw PPTX comment before it is turned into a rendered [`Comment`].
#[derive(Debug, Clone)]
struct RawPptxComment {
    author: String,
    date: String,
    body: String,
    is_reply: bool,
}

/// Parse a legacy comment file (`ppt/comments/commentN.xml`).
///
/// Comments are `<p:cm authorId="0" dt="..." idx="1">` with the body as plain
/// text in a `<p:text>` child. Legacy comments are never replies.
fn parse_legacy_comments(xml: &str, authors: &HashMap<String, String>) -> Vec<RawPptxComment> {
    let mut reader = Reader::from_str(xml);
    let mut out = Vec::new();

    let mut author = String::new();
    let mut date = String::new();
    let mut body = String::new();
    let mut in_cm = false;
    let mut in_text = false;

    loop {
        match reader.read_event() {
            // Only Event::Start sets in_text: a self-closing `<p:text/>` (Empty,
            // no End) would otherwise leave in_text stuck true and leak later
            // text into the body.
            Ok(Event::Start(ref e)) => {
                let local = e.local_name();
                let local_str = std::str::from_utf8(local.as_ref()).unwrap_or("");
                match local_str {
                    "cm" => {
                        in_cm = true;
                        author = String::new();
                        date = String::new();
                        body = String::new();
                        if let Some(v) = attr_value_unescaped(e, "authorId") {
                            author = authors.get(&v).cloned().unwrap_or_default();
                        }
                        if let Some(v) = attr_value_unescaped(e, "dt") {
                            date = v;
                        }
                    }
                    "text" if in_cm => in_text = true,
                    _ => {}
                }
            }
            // A self-closing `<p:cm .../>` (no children) still yields a comment.
            Ok(Event::Empty(ref e)) => {
                let local = e.local_name();
                if std::str::from_utf8(local.as_ref()).unwrap_or("") == "cm" {
                    let author = attr_value_unescaped(e, "authorId")
                        .and_then(|v| authors.get(&v).cloned())
                        .unwrap_or_default();
                    let date = attr_value_unescaped(e, "dt").unwrap_or_default();
                    out.push(RawPptxComment {
                        author,
                        date,
                        body: String::new(),
                        is_reply: false,
                    });
                }
            }
            Ok(Event::Text(ref e)) if in_text => {
                body.push_str(&e.unescape().unwrap_or_default());
            }
            Ok(Event::End(ref e)) => {
                let local = e.local_name();
                let local_str = std::str::from_utf8(local.as_ref()).unwrap_or("");
                match local_str {
                    "text" => in_text = false,
                    "cm" if in_cm => {
                        out.push(RawPptxComment {
                            author: std::mem::take(&mut author),
                            date: std::mem::take(&mut date),
                            body: std::mem::take(&mut body),
                            is_reply: false,
                        });
                        in_cm = false;
                    }
                    _ => {}
                }
            }
            Ok(Event::Eof) => break,
            Err(_) => break,
            _ => {}
        }
    }

    out
}

/// Parse a modern comment file (`ppt/comments/modernComment_*.xml`).
///
/// Comments are `<p188:cm authorId="{GUID}" created="...">` whose body lives in
/// DrawingML `<a:t>` runs inside `<p188:txBody>`. Replies are nested inside a
/// `<p188:replyLst>` and are marked `is_reply = true`. Top-level comments and
/// replies are emitted in document order (a parent precedes its replies).
fn parse_modern_comments(xml: &str, authors: &HashMap<String, String>) -> Vec<RawPptxComment> {
    let mut reader = Reader::from_str(xml);
    let mut out = Vec::new();

    /// One in-progress comment on the parse stack.
    struct Frame {
        /// Document start order, so a parent (whose start tag precedes its
        /// reply's) is emitted before its replies even though it closes later.
        seq: usize,
        author: String,
        date: String,
        body: String,
        is_reply: bool,
    }

    // Comments nest: a `cm`'s `replyLst` (and its `reply` children) appears
    // before the `cm`'s own `txBody`. A stack keeps each comment's captured
    // attributes and body separate; text appends to the innermost open frame.
    // Finished frames are collected with their start `seq`, then sorted so the
    // emit order is document order (parent before reply).
    let mut stack: Vec<Frame> = Vec::new();
    let mut finished: Vec<Frame> = Vec::new();
    let mut next_seq: usize = 0;
    let mut in_text = false;

    // Build a Frame from a cm/reply element's attributes.
    let make_frame = |e: &quick_xml::events::BytesStart, is_reply: bool, seq: usize| {
        let author = attr_value_unescaped(e, "authorId")
            .and_then(|v| authors.get(&v).cloned())
            .unwrap_or_default();
        let date = attr_value_unescaped(e, "created").unwrap_or_default();
        Frame {
            seq,
            author,
            date,
            body: String::new(),
            is_reply,
        }
    };

    loop {
        match reader.read_event() {
            Ok(Event::Start(ref e)) => {
                let local = e.local_name();
                let local_str = std::str::from_utf8(local.as_ref()).unwrap_or("");
                match local_str {
                    "cm" | "reply" => {
                        stack.push(make_frame(e, local_str == "reply", next_seq));
                        next_seq += 1;
                    }
                    // Only Event::Start sets in_text: a self-closing `<a:t/>`
                    // (Empty, no End) would otherwise leak later text into the
                    // innermost open frame's body.
                    "t" if !stack.is_empty() => in_text = true,
                    _ => {}
                }
            }
            Ok(Event::Empty(ref e)) => {
                // A self-closing `<p188:cm/>`/`<p188:reply/>` (no body) is a
                // complete empty comment.
                let local = e.local_name();
                let local_str = std::str::from_utf8(local.as_ref()).unwrap_or("");
                if local_str == "cm" || local_str == "reply" {
                    finished.push(make_frame(e, local_str == "reply", next_seq));
                    next_seq += 1;
                }
            }
            Ok(Event::Text(ref e)) if in_text => {
                if let Some(frame) = stack.last_mut() {
                    frame.body.push_str(&e.unescape().unwrap_or_default());
                }
            }
            Ok(Event::End(ref e)) => {
                let local = e.local_name();
                let local_str = std::str::from_utf8(local.as_ref()).unwrap_or("");
                match local_str {
                    "t" => in_text = false,
                    "cm" | "reply" => {
                        if let Some(frame) = stack.pop() {
                            finished.push(frame);
                        }
                    }
                    _ => {}
                }
            }
            Ok(Event::Eof) => break,
            Err(_) => break,
            _ => {}
        }
    }

    finished.sort_by_key(|f| f.seq);
    for frame in finished {
        out.push(RawPptxComment {
            author: frame.author,
            date: frame.date,
            body: frame.body,
            is_reply: frame.is_reply,
        });
    }

    out
}

/// Build the slide-label `source` for a PPTX comment: `Slide N: Title` when the
/// slide has a title, else `Slide N`.
///
/// The title's internal whitespace is collapsed (a multi-paragraph or
/// `<a:br/>`-bearing title contains newlines) and the label is capped, so the
/// rendered `- **source**:` line never breaks across multiple lines.
fn slide_label(number: usize, title: Option<&str>) -> String {
    let label = match title.map(comments::collapse_ws) {
        Some(t) if !t.is_empty() => format!("Slide {number}: {t}"),
        _ => format!("Slide {number}"),
    };
    comments::cap_text(&label, comments::SOURCE_CAP)
}

/// Convert raw PPTX comments for one slide into rendered [`Comment`]s.
fn build_pptx_comments(raw: Vec<RawPptxComment>, source: &str) -> Vec<Comment> {
    raw.into_iter()
        .map(|rc| Comment {
            author: comments::format_author(&rc.author, &rc.date),
            body: comments::collapse_ws(&rc.body),
            source: source.to_string(),
            is_reply: rc.is_reply,
        })
        .collect()
}

// ---- Internal conversion (parse + image extraction, no resolution) ----

impl PptxConverter {
    /// Parse the presentation and extract images without resolving placeholders.
    ///
    /// Returns the conversion result (with unresolved placeholders in markdown),
    /// pending image data for later resolution (sync or async), and any extracted
    /// comments (empty unless `options.extract_comments` is set). Comments are
    /// appended to the output by the caller, after image placeholders resolve.
    pub(crate) fn convert_inner(
        &self,
        data: &[u8],
        options: &ConversionOptions,
    ) -> Result<(ConversionResult, PendingImageResolution, Vec<Comment>), ConvertError> {
        let cursor = Cursor::new(data);
        let mut archive = ZipArchive::new(cursor)?;

        crate::zip_utils::validate_zip_budget(&mut archive, options.max_uncompressed_zip_bytes)?;

        let mut warnings: Vec<ConversionWarning> = Vec::new();
        let mut images: Vec<(String, Vec<u8>)> = Vec::new();

        // 1. Parse presentation.xml.rels (optional but needed for slide resolution)
        let pres_rels = match read_zip_text(&mut archive, "ppt/_rels/presentation.xml.rels")? {
            Some(xml) => parse_relationships(&xml),
            None => HashMap::new(),
        };

        // 2. Parse presentation.xml (required)
        let pres_xml = read_zip_text(&mut archive, "ppt/presentation.xml")?.ok_or_else(|| {
            ConvertError::MalformedDocument {
                reason: "missing ppt/presentation.xml".to_string(),
            }
        })?;

        // 3. Resolve slide order
        let slides = resolve_slide_order(&pres_xml, &pres_rels);

        if slides.is_empty() {
            return Ok((
                ConversionResult {
                    markdown: String::new(),
                    ..Default::default()
                },
                PendingImageResolution::default(),
                Vec::new(),
            ));
        }

        // Load comment author registries (legacy + modern) once, if requested.
        let comment_authors = if options.extract_comments {
            let mut map = HashMap::new();
            if let Some(xml) = read_zip_text_lossy(&mut archive, "ppt/commentAuthors.xml")? {
                map.extend(parse_author_registry(&xml, "cmAuthor"));
            }
            if let Some(xml) = read_zip_text_lossy(&mut archive, "ppt/authors.xml")? {
                map.extend(parse_author_registry(&xml, "author"));
            }
            map
        } else {
            HashMap::new()
        };

        // 4. Process each slide — collect all image infos and bytes across slides
        let mut slide_markdowns: Vec<String> = Vec::new();
        let mut slide_plains: Vec<String> = Vec::new();
        let mut document_title: Option<String> = None;
        let mut total_image_bytes: usize = 0;
        let mut image_counter: usize = 0;
        let mut all_image_infos: Vec<ImageInfo> = Vec::new();
        let mut all_image_bytes: HashMap<String, Vec<u8>> = HashMap::new();
        let mut all_comments: Vec<Comment> = Vec::new();

        for slide_info in &slides {
            // Read slide XML
            let slide_xml = match read_zip_text(&mut archive, &slide_info.path)? {
                Some(xml) => xml,
                None => {
                    warnings.push(ConversionWarning {
                        code: WarningCode::SkippedElement,
                        message: format!("slide file not found: {}", slide_info.path),
                        location: Some(slide_info.path.clone()),
                    });
                    continue;
                }
            };

            // Parse slide content
            let (shapes, mut slide_warnings) = parse_slide(&slide_xml);
            warnings.append(&mut slide_warnings);

            // Read slide rels for notes and images
            let slide_rels_path = derive_rels_path(&slide_info.path);
            let slide_rels = match read_zip_text(&mut archive, &slide_rels_path)? {
                Some(xml) => parse_relationships(&xml),
                None => HashMap::new(),
            };

            // Parse notes
            let notes = if let Some(notes_target) = resolve_notes_path(&slide_rels) {
                let notes_path = resolve_relative_to_file(&slide_info.path, &notes_target);
                match read_zip_text(&mut archive, &notes_path)? {
                    Some(xml) => parse_notes(&xml),
                    None => None,
                }
            } else {
                None
            };

            // Extract comments anchored to this slide (legacy + modern schemes).
            if options.extract_comments {
                let slide_title = shapes.iter().find_map(|s| match s {
                    ShapeContent::Title(t) => Some(t.as_str()),
                    _ => None,
                });
                let source = slide_label(slide_info.number, slide_title);
                // Collect comment-part targets, sorted by path for deterministic
                // ordering when a slide references more than one comment file.
                // The modern scheme uses the office/2018/10 namespace; legacy
                // uses the 2006 one.
                let mut comment_targets: Vec<(String, bool)> = slide_rels
                    .values()
                    .filter(|rel| rel.rel_type.contains("comments"))
                    .map(|rel| {
                        let path = resolve_relative_to_file(&slide_info.path, &rel.target);
                        (path, rel.rel_type.contains("2018"))
                    })
                    .collect();
                comment_targets.sort();
                // A slide can carry BOTH a modern and a legacy comment part for
                // back-compat, describing the same threads. Prefer modern and
                // skip legacy in that case to avoid double-reporting.
                let has_modern = comment_targets.iter().any(|(_, m)| *m);
                for (path, is_modern) in comment_targets {
                    if has_modern && !is_modern {
                        continue;
                    }
                    let Some(xml) = read_zip_text_lossy(&mut archive, &path)? else {
                        continue;
                    };
                    let raw = if is_modern {
                        parse_modern_comments(&xml, &comment_authors)
                    } else {
                        parse_legacy_comments(&xml, &comment_authors)
                    };
                    all_comments.extend(build_pptx_comments(raw, &source));
                }
            }

            // Resolve image filenames and optionally extract image data
            let need_image_bytes = options.extract_images || options.image_describer.is_some();
            let mut image_filenames: HashMap<String, String> = HashMap::new();
            for shape in &shapes {
                if let ShapeContent::Image { rel_id, .. } = shape
                    && let Some(rel) = slide_rels.get(rel_id)
                {
                    let image_path = resolve_relative_to_file(&slide_info.path, &rel.target);
                    let filename = image_path.rsplit('/').next().unwrap_or(&image_path);
                    image_filenames.insert(rel_id.clone(), filename.to_string());

                    if need_image_bytes
                        && total_image_bytes < options.max_total_image_bytes
                        && let Ok(Some(img_data)) = read_zip_bytes(&mut archive, &image_path)
                    {
                        total_image_bytes += img_data.len();
                        if total_image_bytes <= options.max_total_image_bytes {
                            if options.extract_images {
                                images.push((filename.to_string(), img_data.clone()));
                            }
                            let bytes_key = format!("{}::{}", slide_info.path, rel_id);
                            all_image_bytes.insert(bytes_key, img_data);
                        } else {
                            warnings.push(ConversionWarning {
                                code: WarningCode::ResourceLimitReached,
                                message: format!(
                                    "total image bytes exceeded limit ({})",
                                    options.max_total_image_bytes
                                ),
                                location: Some(image_path),
                            });
                        }
                    }
                }
            }

            // Set document title from first slide's title
            if document_title.is_none() {
                document_title = shapes.iter().find_map(|s| {
                    if let ShapeContent::Title(t) = s {
                        Some(t.clone())
                    } else {
                        None
                    }
                });
            }

            let (slide_md, slide_plain, slide_image_infos) = render_slide(
                slide_info.number,
                &shapes,
                &notes,
                &image_filenames,
                &slide_info.path,
                &mut image_counter,
            );

            all_image_infos.extend(slide_image_infos);
            slide_markdowns.push(slide_md);
            slide_plains.push(slide_plain);
        }

        // Join slides with horizontal rule separator (markdown) or blank line (plain text)
        let markdown = slide_markdowns.join("\n\n---\n\n");
        let markdown = if markdown.is_empty() {
            markdown
        } else {
            format!("{markdown}\n")
        };

        let plain_text = slide_plains.join("\n\n");
        let plain_text = if plain_text.is_empty() {
            plain_text
        } else {
            format!("{plain_text}\n")
        };

        let result = ConversionResult {
            markdown,
            plain_text,
            title: document_title,
            images,
            warnings,
        };

        let pending = PendingImageResolution {
            infos: all_image_infos,
            bytes: all_image_bytes,
        };

        Ok((result, pending, all_comments))
    }
}

// ---- Converter trait impl ----

impl Converter for PptxConverter {
    fn supported_extensions(&self) -> &[&str] {
        &["pptx"]
    }

    fn convert(
        &self,
        data: &[u8],
        options: &ConversionOptions,
    ) -> Result<ConversionResult, ConvertError> {
        let (mut result, pending, doc_comments) = self.convert_inner(data, options)?;
        resolve_image_placeholders(
            &mut result.markdown,
            &mut result.plain_text,
            &pending.infos,
            &pending.bytes,
            options.image_describer.as_deref(),
            &mut result.warnings,
        );
        comments::append_comments(&mut result.markdown, &mut result.plain_text, &doc_comments);
        Ok(result)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    // ---- Helper: build minimal PPTX ZIP in memory ----

    struct TestSlide<'a> {
        title: Option<&'a str>,
        body_texts: Vec<&'a str>,
        notes: Option<&'a str>,
        table: Option<TestTable<'a>>,
        images: Vec<&'a str>,                  // rel IDs for image references
        image_alt_texts: Vec<Option<&'a str>>, // alt text per image (parallel to images)
    }

    struct TestTable<'a> {
        headers: Vec<&'a str>,
        rows: Vec<Vec<&'a str>>,
    }

    /// Build a minimal PPTX ZIP in memory.
    fn build_test_pptx(slides: &[TestSlide]) -> Vec<u8> {
        use std::io::Write;
        use zip::ZipWriter;
        use zip::write::SimpleFileOptions;

        let buf = Vec::new();
        let mut zip = ZipWriter::new(Cursor::new(buf));
        let opts = SimpleFileOptions::default();

        // [Content_Types].xml
        let mut ct = String::from(r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>"#);
        ct.push_str(
            r#"<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">"#,
        );
        ct.push_str(
            r#"<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>"#,
        );
        ct.push_str(r#"<Default Extension="xml" ContentType="application/xml"/>"#);
        ct.push_str("</Types>");
        zip.start_file("[Content_Types].xml", opts).unwrap();
        zip.write_all(ct.as_bytes()).unwrap();

        // Build presentation.xml with slide references
        let mut pres_xml = String::from(
            r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?><p:presentation xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"><p:sldIdLst>"#,
        );
        let mut pres_rels_xml = String::from(
            r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?><Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">"#,
        );

        for (i, slide) in slides.iter().enumerate() {
            let slide_num = i + 1;
            let rid = format!("rId{slide_num}");
            let slide_id = 256 + i;

            pres_xml.push_str(&format!(r#"<p:sldId id="{slide_id}" r:id="{rid}"/>"#));
            pres_rels_xml.push_str(&format!(
                r#"<Relationship Id="{rid}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide" Target="slides/slide{slide_num}.xml"/>"#
            ));

            // Build slide XML
            let slide_xml = build_slide_xml(slide);
            zip.start_file(format!("ppt/slides/slide{slide_num}.xml"), opts)
                .unwrap();
            zip.write_all(slide_xml.as_bytes()).unwrap();

            // Build slide rels if notes or images exist
            if slide.notes.is_some() || !slide.images.is_empty() {
                let mut slide_rels = String::from(
                    r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?><Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">"#,
                );

                if slide.notes.is_some() {
                    slide_rels.push_str(&format!(
                        r#"<Relationship Id="rIdNotes" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide" Target="../notesSlides/notesSlide{slide_num}.xml"/>"#
                    ));
                }

                for (img_idx, _) in slide.images.iter().enumerate() {
                    let img_rid = format!("rIdImg{}", img_idx + 1);
                    slide_rels.push_str(&format!(
                        r#"<Relationship Id="{img_rid}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" Target="../media/image{}.png"/>"#,
                        img_idx + 1
                    ));
                }

                slide_rels.push_str("</Relationships>");
                zip.start_file(format!("ppt/slides/_rels/slide{slide_num}.xml.rels"), opts)
                    .unwrap();
                zip.write_all(slide_rels.as_bytes()).unwrap();
            }

            // Build notes slide if present
            if let Some(notes_text) = slide.notes {
                let notes_xml = build_notes_xml(notes_text);
                zip.start_file(format!("ppt/notesSlides/notesSlide{slide_num}.xml"), opts)
                    .unwrap();
                zip.write_all(notes_xml.as_bytes()).unwrap();
            }
        }

        pres_xml.push_str("</p:sldIdLst></p:presentation>");
        pres_rels_xml.push_str("</Relationships>");

        zip.start_file("ppt/presentation.xml", opts).unwrap();
        zip.write_all(pres_xml.as_bytes()).unwrap();

        zip.start_file("ppt/_rels/presentation.xml.rels", opts)
            .unwrap();
        zip.write_all(pres_rels_xml.as_bytes()).unwrap();

        let cursor = zip.finish().unwrap();
        cursor.into_inner()
    }

    /// Build the XML for a single slide.
    fn build_slide_xml(slide: &TestSlide) -> String {
        let mut xml = String::from(
            r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?><p:sld xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"><p:cSld><p:spTree>"#,
        );

        // Title shape
        if let Some(title) = slide.title {
            xml.push_str(&format!(
                r#"<p:sp><p:nvSpPr><p:cNvPr id="1" name="Title"/><p:cNvSpPr/><p:nvPr><p:ph type="title"/></p:nvPr></p:nvSpPr><p:txBody><a:p><a:r><a:t>{title}</a:t></a:r></a:p></p:txBody></p:sp>"#
            ));
        }

        // Body text shapes
        for text in &slide.body_texts {
            xml.push_str(&format!(
                r#"<p:sp><p:nvSpPr><p:cNvPr id="2" name="Content"/><p:cNvSpPr/><p:nvPr><p:ph type="body"/></p:nvPr></p:nvSpPr><p:txBody><a:p><a:r><a:t>{text}</a:t></a:r></a:p></p:txBody></p:sp>"#
            ));
        }

        // Table
        if let Some(table) = &slide.table {
            xml.push_str(r#"<p:graphicFrame><p:nvGraphicFramePr><p:cNvPr id="3" name="Table"/><p:cNvGraphicFramePr/><p:nvPr/></p:nvGraphicFramePr><a:graphic><a:graphicData><a:tbl>"#);

            // Header row
            xml.push_str("<a:tr>");
            for header in &table.headers {
                xml.push_str(&format!(
                    r#"<a:tc><a:txBody><a:p><a:r><a:t>{header}</a:t></a:r></a:p></a:txBody></a:tc>"#
                ));
            }
            xml.push_str("</a:tr>");

            // Data rows
            for row in &table.rows {
                xml.push_str("<a:tr>");
                for cell in row {
                    xml.push_str(&format!(
                        r#"<a:tc><a:txBody><a:p><a:r><a:t>{cell}</a:t></a:r></a:p></a:txBody></a:tc>"#
                    ));
                }
                xml.push_str("</a:tr>");
            }

            xml.push_str("</a:tbl></a:graphicData></a:graphic></p:graphicFrame>");
        }

        // Image shapes
        for (idx, rel_id) in slide.images.iter().enumerate() {
            let descr_attr = slide
                .image_alt_texts
                .get(idx)
                .and_then(|a| *a)
                .map(|alt| format!(r#" descr="{alt}""#))
                .unwrap_or_default();
            xml.push_str(&format!(
                r#"<p:pic><p:nvPicPr><p:cNvPr id="{}"{descr_attr} name="Picture"/><p:cNvPicPr/><p:nvPr/></p:nvPicPr><p:blipFill><a:blip r:embed="{rel_id}"/></p:blipFill></p:pic>"#,
                10 + idx
            ));
        }

        xml.push_str("</p:spTree></p:cSld></p:sld>");
        xml
    }

    /// Build the XML for a notes slide.
    fn build_notes_xml(text: &str) -> String {
        // Split text by newlines to create separate paragraphs
        let paragraphs: Vec<&str> = text.lines().collect();
        let mut para_xml = String::new();
        for p in &paragraphs {
            para_xml.push_str(&format!(r#"<a:p><a:r><a:t>{p}</a:t></a:r></a:p>"#));
        }

        format!(
            r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?><p:notes xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"><p:cSld><p:spTree><p:sp><p:nvSpPr><p:cNvPr id="1" name="Slide Number"/><p:cNvSpPr/><p:nvPr><p:ph type="sldNum"/></p:nvPr></p:nvSpPr><p:txBody><a:p><a:r><a:t>1</a:t></a:r></a:p></p:txBody></p:sp><p:sp><p:nvSpPr><p:cNvPr id="2" name="Notes"/><p:cNvSpPr/><p:nvPr><p:ph type="body"/></p:nvPr></p:nvSpPr><p:txBody>{para_xml}</p:txBody></p:sp></p:spTree></p:cSld></p:notes>"#
        )
    }

    // ---- Tests ----

    #[test]
    fn test_pptx_supported_extensions() {
        let converter = PptxConverter;
        assert_eq!(converter.supported_extensions(), &["pptx"]);
    }

    #[test]
    fn test_pptx_can_convert() {
        let converter = PptxConverter;
        assert!(converter.can_convert("pptx", &[]));
        assert!(!converter.can_convert("docx", &[]));
        assert!(!converter.can_convert("xlsx", &[]));
        assert!(!converter.can_convert("pdf", &[]));
    }

    #[test]
    fn test_pptx_invalid_data_returns_error() {
        let converter = PptxConverter;
        let result = converter.convert(b"not a valid pptx file", &ConversionOptions::default());
        assert!(result.is_err());
    }

    #[test]
    fn test_pptx_empty_presentation() {
        let data = build_test_pptx(&[]);
        let converter = PptxConverter;
        let result = converter
            .convert(&data, &ConversionOptions::default())
            .unwrap();
        assert_eq!(result.markdown, "");
        assert!(result.title.is_none());
    }

    #[test]
    fn test_pptx_single_slide_title_and_body() {
        let data = build_test_pptx(&[TestSlide {
            title: Some("Hello World"),
            body_texts: vec!["This is the body text."],
            notes: None,
            table: None,
            images: vec![],
            image_alt_texts: vec![],
        }]);
        let converter = PptxConverter;
        let result = converter
            .convert(&data, &ConversionOptions::default())
            .unwrap();
        assert!(result.markdown.contains("## Slide 1: Hello World"));
        assert!(result.markdown.contains("This is the body text."));
    }

    #[test]
    fn test_pptx_multiple_slides_with_separator() {
        let data = build_test_pptx(&[
            TestSlide {
                title: Some("First"),
                body_texts: vec!["Body one."],
                notes: None,
                table: None,
                images: vec![],
                image_alt_texts: vec![],
            },
            TestSlide {
                title: Some("Second"),
                body_texts: vec!["Body two."],
                notes: None,
                table: None,
                images: vec![],
                image_alt_texts: vec![],
            },
        ]);
        let converter = PptxConverter;
        let result = converter
            .convert(&data, &ConversionOptions::default())
            .unwrap();
        assert!(result.markdown.contains("## Slide 1: First"));
        assert!(result.markdown.contains("## Slide 2: Second"));
        assert!(result.markdown.contains("\n\n---\n\n"));
    }

    #[test]
    fn test_pptx_slide_without_title() {
        let data = build_test_pptx(&[TestSlide {
            title: None,
            body_texts: vec!["Just body text."],
            notes: None,
            table: None,
            images: vec![],
            image_alt_texts: vec![],
        }]);
        let converter = PptxConverter;
        let result = converter
            .convert(&data, &ConversionOptions::default())
            .unwrap();
        assert!(result.markdown.contains("## Slide 1\n"));
        // Should NOT have a colon when no title
        assert!(!result.markdown.contains("## Slide 1:"));
    }

    #[test]
    fn test_pptx_title_center_title() {
        // Build a PPTX with ctrTitle placeholder type
        let slide_xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?><p:sld xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"><p:cSld><p:spTree><p:sp><p:nvSpPr><p:cNvPr id="1" name="Title"/><p:cNvSpPr/><p:nvPr><p:ph type="ctrTitle"/></p:nvPr></p:nvSpPr><p:txBody><a:p><a:r><a:t>Center Title</a:t></a:r></a:p></p:txBody></p:sp></p:spTree></p:cSld></p:sld>"#;

        let (shapes, _) = parse_slide(slide_xml);
        assert_eq!(shapes.len(), 1);
        match &shapes[0] {
            ShapeContent::Title(text) => assert_eq!(text, "Center Title"),
            other => panic!("expected Title, got {:?}", other),
        }
    }

    #[test]
    fn test_pptx_document_title_from_first_slide() {
        let data = build_test_pptx(&[
            TestSlide {
                title: Some("Presentation Title"),
                body_texts: vec![],
                notes: None,
                table: None,
                images: vec![],
                image_alt_texts: vec![],
            },
            TestSlide {
                title: Some("Second Slide"),
                body_texts: vec![],
                notes: None,
                table: None,
                images: vec![],
                image_alt_texts: vec![],
            },
        ]);
        let converter = PptxConverter;
        let result = converter
            .convert(&data, &ConversionOptions::default())
            .unwrap();
        assert_eq!(result.title, Some("Presentation Title".to_string()));
    }

    #[test]
    fn test_pptx_body_text_multiple_paragraphs() {
        // Build slide XML with multiple paragraphs in body
        let slide_xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?><p:sld xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"><p:cSld><p:spTree><p:sp><p:nvSpPr><p:cNvPr id="1" name="Content"/><p:cNvSpPr/><p:nvPr><p:ph type="body"/></p:nvPr></p:nvSpPr><p:txBody><a:p><a:r><a:t>First paragraph</a:t></a:r></a:p><a:p><a:r><a:t>Second paragraph</a:t></a:r></a:p></p:txBody></p:sp></p:spTree></p:cSld></p:sld>"#;

        let (shapes, _) = parse_slide(slide_xml);
        assert_eq!(shapes.len(), 1);
        match &shapes[0] {
            ShapeContent::Body(text) => {
                assert!(text.contains("First paragraph"));
                assert!(text.contains("Second paragraph"));
                assert!(text.contains('\n'));
            }
            other => panic!("expected Body, got {:?}", other),
        }
    }

    #[test]
    fn test_pptx_body_text_multiple_runs_joined() {
        let slide_xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?><p:sld xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"><p:cSld><p:spTree><p:sp><p:nvSpPr><p:cNvPr id="1" name="Content"/><p:cNvSpPr/><p:nvPr><p:ph type="body"/></p:nvPr></p:nvSpPr><p:txBody><a:p><a:r><a:t>Hello </a:t></a:r><a:r><a:t>World</a:t></a:r></a:p></p:txBody></p:sp></p:spTree></p:cSld></p:sld>"#;

        let (shapes, _) = parse_slide(slide_xml);
        match &shapes[0] {
            ShapeContent::Body(text) => assert_eq!(text, "Hello World"),
            other => panic!("expected Body, got {:?}", other),
        }
    }

    #[test]
    fn test_pptx_subtitle_treated_as_body() {
        let slide_xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?><p:sld xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"><p:cSld><p:spTree><p:sp><p:nvSpPr><p:cNvPr id="1" name="Title"/><p:cNvSpPr/><p:nvPr><p:ph type="title"/></p:nvPr></p:nvSpPr><p:txBody><a:p><a:r><a:t>Main Title</a:t></a:r></a:p></p:txBody></p:sp><p:sp><p:nvSpPr><p:cNvPr id="2" name="Subtitle"/><p:cNvSpPr/><p:nvPr><p:ph type="subTitle"/></p:nvPr></p:nvSpPr><p:txBody><a:p><a:r><a:t>The subtitle</a:t></a:r></a:p></p:txBody></p:sp></p:spTree></p:cSld></p:sld>"#;

        let (shapes, _) = parse_slide(slide_xml);
        assert_eq!(shapes.len(), 2);
        match &shapes[0] {
            ShapeContent::Title(text) => assert_eq!(text, "Main Title"),
            other => panic!("expected Title, got {:?}", other),
        }
        match &shapes[1] {
            ShapeContent::Body(text) => assert_eq!(text, "The subtitle"),
            other => panic!("expected Body, got {:?}", other),
        }
    }

    #[test]
    fn test_pptx_table_basic() {
        let data = build_test_pptx(&[TestSlide {
            title: Some("Data"),
            body_texts: vec![],
            notes: None,
            table: Some(TestTable {
                headers: vec!["Name", "Value"],
                rows: vec![vec!["Alpha", "100"], vec!["Beta", "200"]],
            }),
            images: vec![],
            image_alt_texts: vec![],
        }]);
        let converter = PptxConverter;
        let result = converter
            .convert(&data, &ConversionOptions::default())
            .unwrap();
        assert!(result.markdown.contains("| Name | Value |"));
        assert!(result.markdown.contains("|---|---|"));
        assert!(result.markdown.contains("| Alpha | 100 |"));
        assert!(result.markdown.contains("| Beta | 200 |"));
    }

    #[test]
    fn test_pptx_table_empty_cells() {
        let data = build_test_pptx(&[TestSlide {
            title: None,
            body_texts: vec![],
            notes: None,
            table: Some(TestTable {
                headers: vec!["A", "B", "C"],
                rows: vec![vec!["1", "", "3"]],
            }),
            images: vec![],
            image_alt_texts: vec![],
        }]);
        let converter = PptxConverter;
        let result = converter
            .convert(&data, &ConversionOptions::default())
            .unwrap();
        assert!(result.markdown.contains("| A | B | C |"));
        assert!(result.markdown.contains("| 1 |  | 3 |"));
    }

    #[test]
    fn test_pptx_notes_basic() {
        let data = build_test_pptx(&[TestSlide {
            title: Some("Slide"),
            body_texts: vec!["Content."],
            notes: Some("This is a speaker note."),
            table: None,
            images: vec![],
            image_alt_texts: vec![],
        }]);
        let converter = PptxConverter;
        let result = converter
            .convert(&data, &ConversionOptions::default())
            .unwrap();
        assert!(result.markdown.contains("> Note: This is a speaker note."));
    }

    #[test]
    fn test_pptx_notes_multiline() {
        let data = build_test_pptx(&[TestSlide {
            title: Some("Slide"),
            body_texts: vec![],
            notes: Some("First line\nSecond line\nThird line"),
            table: None,
            images: vec![],
            image_alt_texts: vec![],
        }]);
        let converter = PptxConverter;
        let result = converter
            .convert(&data, &ConversionOptions::default())
            .unwrap();
        assert!(result.markdown.contains("> Note: First line"));
        assert!(result.markdown.contains("> Second line"));
        assert!(result.markdown.contains("> Third line"));
    }

    #[test]
    fn test_pptx_notes_missing() {
        let data = build_test_pptx(&[TestSlide {
            title: Some("Slide"),
            body_texts: vec!["Text."],
            notes: None,
            table: None,
            images: vec![],
            image_alt_texts: vec![],
        }]);
        let converter = PptxConverter;
        let result = converter
            .convert(&data, &ConversionOptions::default())
            .unwrap();
        assert!(!result.markdown.contains("> Note:"));
    }

    #[test]
    fn test_pptx_unicode_cjk() {
        let data = build_test_pptx(&[TestSlide {
            title: Some("다국어"),
            body_texts: vec!["한국어 테스트", "中文测试", "日本語テスト"],
            notes: None,
            table: None,
            images: vec![],
            image_alt_texts: vec![],
        }]);
        let converter = PptxConverter;
        let result = converter
            .convert(&data, &ConversionOptions::default())
            .unwrap();
        assert!(result.markdown.contains("한국어 테스트"));
        assert!(result.markdown.contains("中文测试"));
        assert!(result.markdown.contains("日本語テスト"));
        assert!(result.markdown.contains("다국어"));
    }

    #[test]
    fn test_pptx_emoji() {
        let data = build_test_pptx(&[TestSlide {
            title: Some("Emoji Test"),
            body_texts: vec!["Rocket: 🚀 Stars: ✨ Earth: 🌍"],
            notes: None,
            table: None,
            images: vec![],
            image_alt_texts: vec![],
        }]);
        let converter = PptxConverter;
        let result = converter
            .convert(&data, &ConversionOptions::default())
            .unwrap();
        assert!(result.markdown.contains("🚀"));
        assert!(result.markdown.contains("✨"));
        assert!(result.markdown.contains("🌍"));
    }

    #[test]
    fn test_pptx_missing_presentation_xml() {
        use std::io::Write;
        use zip::ZipWriter;
        use zip::write::SimpleFileOptions;

        let buf = Vec::new();
        let mut zip = ZipWriter::new(Cursor::new(buf));
        let opts = SimpleFileOptions::default();

        // Just a content types file, no presentation.xml
        zip.start_file("[Content_Types].xml", opts).unwrap();
        zip.write_all(b"<?xml version=\"1.0\"?><Types xmlns=\"http://schemas.openxmlformats.org/package/2006/content-types\"></Types>").unwrap();

        let cursor = zip.finish().unwrap();
        let data = cursor.into_inner();

        let converter = PptxConverter;
        let result = converter.convert(&data, &ConversionOptions::default());
        assert!(result.is_err());
        let err = result.unwrap_err();
        assert!(
            format!("{err}").contains("missing ppt/presentation.xml"),
            "error was: {err}"
        );
    }

    #[test]
    fn test_pptx_missing_slide_file_graceful() {
        use std::io::Write;
        use zip::ZipWriter;
        use zip::write::SimpleFileOptions;

        let buf = Vec::new();
        let mut zip = ZipWriter::new(Cursor::new(buf));
        let opts = SimpleFileOptions::default();

        zip.start_file("[Content_Types].xml", opts).unwrap();
        zip.write_all(b"<?xml version=\"1.0\"?><Types xmlns=\"http://schemas.openxmlformats.org/package/2006/content-types\"></Types>").unwrap();

        // presentation.xml references a slide that doesn't exist
        let pres_xml = r#"<?xml version="1.0"?><p:presentation xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"><p:sldIdLst><p:sldId id="256" r:id="rId1"/></p:sldIdLst></p:presentation>"#;
        zip.start_file("ppt/presentation.xml", opts).unwrap();
        zip.write_all(pres_xml.as_bytes()).unwrap();

        let pres_rels = r#"<?xml version="1.0"?><Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide" Target="slides/slide1.xml"/></Relationships>"#;
        zip.start_file("ppt/_rels/presentation.xml.rels", opts)
            .unwrap();
        zip.write_all(pres_rels.as_bytes()).unwrap();

        let cursor = zip.finish().unwrap();
        let data = cursor.into_inner();

        let converter = PptxConverter;
        let result = converter
            .convert(&data, &ConversionOptions::default())
            .unwrap();
        assert!(!result.warnings.is_empty());
        assert_eq!(result.warnings[0].code, WarningCode::SkippedElement);
        assert!(result.warnings[0].message.contains("slide file not found"));
    }

    #[test]
    fn test_pptx_derive_rels_path() {
        assert_eq!(
            derive_rels_path("ppt/slides/slide1.xml"),
            "ppt/slides/_rels/slide1.xml.rels"
        );
        assert_eq!(
            derive_rels_path("ppt/presentation.xml"),
            "ppt/_rels/presentation.xml.rels"
        );
        assert_eq!(derive_rels_path("file.xml"), "_rels/file.xml.rels");
    }

    #[test]
    fn test_pptx_resolve_relative_to_file() {
        assert_eq!(
            resolve_relative_to_file("ppt/slides/slide1.xml", "../media/image1.png"),
            "ppt/media/image1.png"
        );
        assert_eq!(
            resolve_relative_to_file("ppt/slides/slide1.xml", "/ppt/media/image1.png"),
            "ppt/media/image1.png"
        );
        assert_eq!(
            resolve_relative_to_file("ppt/slides/slide1.xml", "../notesSlides/notesSlide1.xml"),
            "ppt/notesSlides/notesSlide1.xml"
        );
        assert_eq!(
            resolve_relative_to_file("ppt/slides/slide1.xml", "chart1.xml"),
            "ppt/slides/chart1.xml"
        );
    }

    #[test]
    fn test_pptx_image_reference_detected() {
        let slide_xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?><p:sld xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"><p:cSld><p:spTree><p:pic><p:nvPicPr><p:cNvPr id="1" name="Picture"/><p:cNvPicPr/><p:nvPr/></p:nvPicPr><p:blipFill><a:blip r:embed="rId2"/></p:blipFill></p:pic></p:spTree></p:cSld></p:sld>"#;

        let (shapes, _) = parse_slide(slide_xml);
        assert_eq!(shapes.len(), 1);
        match &shapes[0] {
            ShapeContent::Image { rel_id, .. } => assert_eq!(rel_id, "rId2"),
            other => panic!("expected Image, got {:?}", other),
        }
    }

    #[test]
    fn test_pptx_image_alt_text_extracted() {
        let slide_xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?><p:sld xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"><p:cSld><p:spTree><p:pic><p:nvPicPr><p:cNvPr id="10" descr="A beautiful chart" name="Picture"/><p:cNvPicPr/><p:nvPr/></p:nvPicPr><p:blipFill><a:blip r:embed="rId2"/></p:blipFill></p:pic></p:spTree></p:cSld></p:sld>"#;

        let (shapes, _) = parse_slide(slide_xml);
        assert_eq!(shapes.len(), 1);
        match &shapes[0] {
            ShapeContent::Image { rel_id, alt_text } => {
                assert_eq!(rel_id, "rId2");
                assert_eq!(alt_text.as_deref(), Some("A beautiful chart"));
            }
            other => panic!("expected Image, got {:?}", other),
        }
    }

    #[test]
    fn test_pptx_image_alt_text_missing() {
        let slide_xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?><p:sld xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"><p:cSld><p:spTree><p:pic><p:nvPicPr><p:cNvPr id="10" name="Picture"/><p:cNvPicPr/><p:nvPr/></p:nvPicPr><p:blipFill><a:blip r:embed="rId3"/></p:blipFill></p:pic></p:spTree></p:cSld></p:sld>"#;

        let (shapes, _) = parse_slide(slide_xml);
        assert_eq!(shapes.len(), 1);
        match &shapes[0] {
            ShapeContent::Image { rel_id, alt_text } => {
                assert_eq!(rel_id, "rId3");
                assert!(alt_text.is_none());
            }
            other => panic!("expected Image, got {:?}", other),
        }
    }

    #[test]
    fn test_pptx_image_alt_text_in_markdown() {
        let data = build_test_pptx(&[TestSlide {
            title: Some("Images"),
            body_texts: vec![],
            notes: None,
            table: None,
            images: vec!["rIdImg1"],
            image_alt_texts: vec![Some("A diagram of the architecture")],
        }]);
        let converter = PptxConverter;
        let result = converter
            .convert(&data, &ConversionOptions::default())
            .unwrap();
        assert!(
            result
                .markdown
                .contains("![A diagram of the architecture](image1.png)"),
            "markdown was: {}",
            result.markdown
        );
    }

    #[test]
    fn test_pptx_line_break() {
        let slide_xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?><p:sld xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"><p:cSld><p:spTree><p:sp><p:nvSpPr><p:cNvPr id="1" name="Content"/><p:cNvSpPr/><p:nvPr><p:ph type="body"/></p:nvPr></p:nvSpPr><p:txBody><a:p><a:r><a:t>Line one</a:t><a:br/><a:t>Line two</a:t></a:r></a:p></p:txBody></p:sp></p:spTree></p:cSld></p:sld>"#;

        let (shapes, _) = parse_slide(slide_xml);
        match &shapes[0] {
            ShapeContent::Body(text) => assert!(text.contains("Line one\nLine two")),
            other => panic!("expected Body, got {:?}", other),
        }
    }

    // ---- Image describer tests ----

    use crate::converter::ImageDescriber;
    use std::sync::Arc;

    struct MockDescriber {
        description: String,
    }

    impl ImageDescriber for MockDescriber {
        fn describe(
            &self,
            _image_bytes: &[u8],
            _mime_type: &str,
            _prompt: &str,
        ) -> Result<String, ConvertError> {
            Ok(self.description.clone())
        }
    }

    struct FailingDescriber;

    impl ImageDescriber for FailingDescriber {
        fn describe(
            &self,
            _image_bytes: &[u8],
            _mime_type: &str,
            _prompt: &str,
        ) -> Result<String, ConvertError> {
            Err(ConvertError::ImageDescriptionError {
                reason: "API error".to_string(),
            })
        }
    }

    /// Build a PPTX with actual image data embedded in the ZIP for describer tests.
    fn build_test_pptx_with_image_data(
        slides: &[TestSlide],
        image_data: &[(&str, &[u8])], // (path in zip, data)
    ) -> Vec<u8> {
        use std::io::Write;
        use zip::ZipWriter;
        use zip::write::SimpleFileOptions;

        let buf = Vec::new();
        let mut zip = ZipWriter::new(Cursor::new(buf));
        let opts = SimpleFileOptions::default();

        let mut ct = String::from(r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>"#);
        ct.push_str(
            r#"<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">"#,
        );
        ct.push_str(
            r#"<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>"#,
        );
        ct.push_str(r#"<Default Extension="xml" ContentType="application/xml"/>"#);
        ct.push_str(r#"<Default Extension="png" ContentType="image/png"/>"#);
        ct.push_str("</Types>");
        zip.start_file("[Content_Types].xml", opts).unwrap();
        zip.write_all(ct.as_bytes()).unwrap();

        let mut pres_xml = String::from(
            r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?><p:presentation xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"><p:sldIdLst>"#,
        );
        let mut pres_rels_xml = String::from(
            r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?><Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">"#,
        );

        for (i, slide) in slides.iter().enumerate() {
            let slide_num = i + 1;
            let rid = format!("rId{slide_num}");
            let slide_id = 256 + i;

            pres_xml.push_str(&format!(r#"<p:sldId id="{slide_id}" r:id="{rid}"/>"#));
            pres_rels_xml.push_str(&format!(
                r#"<Relationship Id="{rid}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide" Target="slides/slide{slide_num}.xml"/>"#
            ));

            let slide_xml = build_slide_xml(slide);
            zip.start_file(format!("ppt/slides/slide{slide_num}.xml"), opts)
                .unwrap();
            zip.write_all(slide_xml.as_bytes()).unwrap();

            if slide.notes.is_some() || !slide.images.is_empty() {
                let mut slide_rels = String::from(
                    r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?><Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">"#,
                );

                if slide.notes.is_some() {
                    slide_rels.push_str(&format!(
                        r#"<Relationship Id="rIdNotes" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide" Target="../notesSlides/notesSlide{slide_num}.xml"/>"#
                    ));
                }

                for (img_idx, _) in slide.images.iter().enumerate() {
                    let img_rid = format!("rIdImg{}", img_idx + 1);
                    slide_rels.push_str(&format!(
                        r#"<Relationship Id="{img_rid}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" Target="../media/image{}.png"/>"#,
                        img_idx + 1
                    ));
                }

                slide_rels.push_str("</Relationships>");
                zip.start_file(format!("ppt/slides/_rels/slide{slide_num}.xml.rels"), opts)
                    .unwrap();
                zip.write_all(slide_rels.as_bytes()).unwrap();
            }

            if let Some(notes_text) = slide.notes {
                let notes_xml = build_notes_xml(notes_text);
                zip.start_file(format!("ppt/notesSlides/notesSlide{slide_num}.xml"), opts)
                    .unwrap();
                zip.write_all(notes_xml.as_bytes()).unwrap();
            }
        }

        pres_xml.push_str("</p:sldIdLst></p:presentation>");
        pres_rels_xml.push_str("</Relationships>");

        zip.start_file("ppt/presentation.xml", opts).unwrap();
        zip.write_all(pres_xml.as_bytes()).unwrap();

        zip.start_file("ppt/_rels/presentation.xml.rels", opts)
            .unwrap();
        zip.write_all(pres_rels_xml.as_bytes()).unwrap();

        // Add image files
        for (path, data) in image_data {
            zip.start_file(path.to_string(), opts).unwrap();
            zip.write_all(data).unwrap();
        }

        let cursor = zip.finish().unwrap();
        cursor.into_inner()
    }

    #[test]
    fn test_pptx_multiple_images_on_one_slide() {
        let data = build_test_pptx_with_image_data(
            &[TestSlide {
                title: Some("Gallery"),
                body_texts: vec![],
                notes: None,
                table: None,
                images: vec!["rIdImg1", "rIdImg2"],
                image_alt_texts: vec![Some("First image"), Some("Second image")],
            }],
            &[
                ("ppt/media/image1.png", b"fake-png-1"),
                ("ppt/media/image2.png", b"fake-png-2"),
            ],
        );

        let converter = PptxConverter;
        let result = converter
            .convert(&data, &ConversionOptions::default())
            .unwrap();
        assert!(
            result.markdown.contains("![First image](image1.png)"),
            "markdown was: {}",
            result.markdown
        );
        assert!(
            result.markdown.contains("![Second image](image2.png)"),
            "markdown was: {}",
            result.markdown
        );
        // Verify ordering: image1 appears before image2
        let pos1 = result.markdown.find("image1.png").unwrap();
        let pos2 = result.markdown.find("image2.png").unwrap();
        assert!(pos1 < pos2, "image1 should appear before image2");
    }

    #[test]
    fn test_pptx_image_describer_replaces_alt_text() {
        let data = build_test_pptx_with_image_data(
            &[TestSlide {
                title: Some("Slide with Image"),
                body_texts: vec![],
                notes: None,
                table: None,
                images: vec!["rIdImg1"],
                image_alt_texts: vec![None],
            }],
            &[("ppt/media/image1.png", b"fake-png-data")],
        );

        let converter = PptxConverter;
        let options = ConversionOptions {
            image_describer: Some(Arc::new(MockDescriber {
                description: "A diagram showing data flow".to_string(),
            })),
            ..Default::default()
        };
        let result = converter.convert(&data, &options).unwrap();
        assert!(
            result
                .markdown
                .contains("![A diagram showing data flow](image1.png)"),
            "markdown was: {}",
            result.markdown
        );
        assert!(result.images.is_empty());
    }

    #[test]
    fn test_pptx_image_describer_error_keeps_original_alt() {
        let data = build_test_pptx_with_image_data(
            &[TestSlide {
                title: Some("Slide"),
                body_texts: vec![],
                notes: None,
                table: None,
                images: vec!["rIdImg1"],
                image_alt_texts: vec![Some("Original description")],
            }],
            &[("ppt/media/image1.png", b"fake-png-data")],
        );

        let converter = PptxConverter;
        let options = ConversionOptions {
            image_describer: Some(Arc::new(FailingDescriber)),
            ..Default::default()
        };
        let result = converter.convert(&data, &options).unwrap();
        assert!(
            result
                .markdown
                .contains("![Original description](image1.png)"),
            "markdown was: {}",
            result.markdown
        );
        assert!(
            result
                .warnings
                .iter()
                .any(|w| w.code == WarningCode::SkippedElement
                    && w.message.contains("image description failed")),
        );
    }

    // ---- Group shape tests ----

    #[test]
    fn test_pptx_group_shape_text_extracted() {
        // A single <p:sp> inside a <p:grpSp> should have its text extracted
        let slide_xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?><p:sld xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"><p:cSld><p:spTree><p:grpSp><p:nvGrpSpPr><p:cNvPr id="10" name="Group 1"/><p:cNvGrpSpPr/><p:nvPr/></p:nvGrpSpPr><p:grpSpPr/><p:sp><p:nvSpPr><p:cNvPr id="11" name="TextBox"/><p:cNvSpPr/><p:nvPr/></p:nvSpPr><p:txBody><a:p><a:r><a:t>Group text</a:t></a:r></a:p></p:txBody></p:sp></p:grpSp></p:spTree></p:cSld></p:sld>"#;

        let (shapes, warnings) = parse_slide(slide_xml);
        assert!(warnings.is_empty());
        assert_eq!(shapes.len(), 1);
        match &shapes[0] {
            ShapeContent::Body(text) => assert_eq!(text, "Group text"),
            other => panic!("expected Body, got {:?}", other),
        }
    }

    #[test]
    fn test_pptx_group_shape_multiple_children() {
        // Two <p:sp> shapes inside one <p:grpSp>
        let slide_xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?><p:sld xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"><p:cSld><p:spTree><p:grpSp><p:nvGrpSpPr><p:cNvPr id="10" name="Group"/><p:cNvGrpSpPr/><p:nvPr/></p:nvGrpSpPr><p:grpSpPr/><p:sp><p:nvSpPr><p:cNvPr id="11" name="Shape1"/><p:cNvSpPr/><p:nvPr/></p:nvSpPr><p:txBody><a:p><a:r><a:t>First shape</a:t></a:r></a:p></p:txBody></p:sp><p:sp><p:nvSpPr><p:cNvPr id="12" name="Shape2"/><p:cNvSpPr/><p:nvPr/></p:nvSpPr><p:txBody><a:p><a:r><a:t>Second shape</a:t></a:r></a:p></p:txBody></p:sp></p:grpSp></p:spTree></p:cSld></p:sld>"#;

        let (shapes, warnings) = parse_slide(slide_xml);
        assert!(warnings.is_empty());
        assert_eq!(shapes.len(), 2);
        match &shapes[0] {
            ShapeContent::Body(text) => assert_eq!(text, "First shape"),
            other => panic!("expected Body, got {:?}", other),
        }
        match &shapes[1] {
            ShapeContent::Body(text) => assert_eq!(text, "Second shape"),
            other => panic!("expected Body, got {:?}", other),
        }
    }

    #[test]
    fn test_pptx_group_shape_nested() {
        // <p:grpSp> inside <p:grpSp> with a <p:sp> child
        let slide_xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?><p:sld xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"><p:cSld><p:spTree><p:grpSp><p:nvGrpSpPr><p:cNvPr id="10" name="Outer"/><p:cNvGrpSpPr/><p:nvPr/></p:nvGrpSpPr><p:grpSpPr/><p:grpSp><p:nvGrpSpPr><p:cNvPr id="11" name="Inner"/><p:cNvGrpSpPr/><p:nvPr/></p:nvGrpSpPr><p:grpSpPr/><p:sp><p:nvSpPr><p:cNvPr id="12" name="Deep"/><p:cNvSpPr/><p:nvPr/></p:nvSpPr><p:txBody><a:p><a:r><a:t>Nested group text</a:t></a:r></a:p></p:txBody></p:sp></p:grpSp></p:grpSp></p:spTree></p:cSld></p:sld>"#;

        let (shapes, warnings) = parse_slide(slide_xml);
        assert!(warnings.is_empty());
        assert_eq!(shapes.len(), 1);
        match &shapes[0] {
            ShapeContent::Body(text) => assert_eq!(text, "Nested group text"),
            other => panic!("expected Body, got {:?}", other),
        }
    }

    #[test]
    fn test_pptx_group_shape_with_table() {
        // <p:graphicFrame> (table) inside a <p:grpSp>
        let slide_xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?><p:sld xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"><p:cSld><p:spTree><p:grpSp><p:nvGrpSpPr><p:cNvPr id="10" name="Group"/><p:cNvGrpSpPr/><p:nvPr/></p:nvGrpSpPr><p:grpSpPr/><p:graphicFrame><p:nvGraphicFramePr><p:cNvPr id="11" name="Table"/><p:cNvGraphicFramePr/><p:nvPr/></p:nvGraphicFramePr><a:graphic><a:graphicData><a:tbl><a:tr><a:tc><a:txBody><a:p><a:r><a:t>H1</a:t></a:r></a:p></a:txBody></a:tc><a:tc><a:txBody><a:p><a:r><a:t>H2</a:t></a:r></a:p></a:txBody></a:tc></a:tr><a:tr><a:tc><a:txBody><a:p><a:r><a:t>A</a:t></a:r></a:p></a:txBody></a:tc><a:tc><a:txBody><a:p><a:r><a:t>B</a:t></a:r></a:p></a:txBody></a:tc></a:tr></a:tbl></a:graphicData></a:graphic></p:graphicFrame></p:grpSp></p:spTree></p:cSld></p:sld>"#;

        let (shapes, warnings) = parse_slide(slide_xml);
        assert!(warnings.is_empty());
        assert_eq!(shapes.len(), 1);
        match &shapes[0] {
            ShapeContent::Table { headers, rows } => {
                assert_eq!(headers, &["H1", "H2"]);
                assert_eq!(rows.len(), 1);
                assert_eq!(rows[0], &["A", "B"]);
            }
            other => panic!("expected Table, got {:?}", other),
        }
    }

    #[test]
    fn test_pptx_group_shape_with_image() {
        // <p:pic> inside a <p:grpSp>
        let slide_xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?><p:sld xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"><p:cSld><p:spTree><p:grpSp><p:nvGrpSpPr><p:cNvPr id="10" name="Group"/><p:cNvGrpSpPr/><p:nvPr/></p:nvGrpSpPr><p:grpSpPr/><p:pic><p:nvPicPr><p:cNvPr id="11" descr="Alt text" name="Picture"/><p:cNvPicPr/><p:nvPr/></p:nvPicPr><p:blipFill><a:blip r:embed="rIdImg1"/></p:blipFill></p:pic></p:grpSp></p:spTree></p:cSld></p:sld>"#;

        let (shapes, warnings) = parse_slide(slide_xml);
        assert!(warnings.is_empty());
        assert_eq!(shapes.len(), 1);
        match &shapes[0] {
            ShapeContent::Image { rel_id, alt_text } => {
                assert_eq!(rel_id, "rIdImg1");
                assert_eq!(alt_text.as_deref(), Some("Alt text"));
            }
            other => panic!("expected Image, got {:?}", other),
        }
    }

    #[test]
    fn test_pptx_group_shape_empty() {
        // Empty <p:grpSp> produces no shapes
        let slide_xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?><p:sld xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"><p:cSld><p:spTree><p:grpSp><p:nvGrpSpPr><p:cNvPr id="10" name="Empty Group"/><p:cNvGrpSpPr/><p:nvPr/></p:nvGrpSpPr><p:grpSpPr/></p:grpSp></p:spTree></p:cSld></p:sld>"#;

        let (shapes, warnings) = parse_slide(slide_xml);
        assert!(warnings.is_empty());
        assert!(shapes.is_empty());
    }

    // ---- Comment extraction tests ----

    // -- unit: author registries --

    #[test]
    fn test_parse_comment_authors_legacy() {
        let xml = r#"<?xml version="1.0"?><p:cmAuthorLst xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main"><p:cmAuthor id="0" name="Julie Lee" initials="JL"/><p:cmAuthor id="1" name="Sam Park" initials="SP"/></p:cmAuthorLst>"#;
        let authors = parse_author_registry(xml, "cmAuthor");
        assert_eq!(authors.get("0").map(|s| s.as_str()), Some("Julie Lee"));
        assert_eq!(authors.get("1").map(|s| s.as_str()), Some("Sam Park"));
    }

    #[test]
    fn test_parse_authors_modern() {
        let xml = r#"<?xml version="1.0"?><p188:authorLst xmlns:p188="http://schemas.microsoft.com/office/powerpoint/2018/8/main"><p188:author id="{GUID-1}" name="Julie Lee" initials="JL" userId="u1" providerId="AD"/></p188:authorLst>"#;
        let authors = parse_author_registry(xml, "author");
        assert_eq!(
            authors.get("{GUID-1}").map(|s| s.as_str()),
            Some("Julie Lee")
        );
    }

    // -- unit: comment files --

    #[test]
    fn test_parse_legacy_comments() {
        let mut authors = HashMap::new();
        authors.insert("0".to_string(), "Julie Lee".to_string());
        let xml = r#"<?xml version="1.0"?><p:cmLst xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main"><p:cm authorId="0" dt="2024-01-15T10:30:00.000" idx="1"><p:pos x="10" y="10"/><p:text>Add a diagram here.</p:text></p:cm></p:cmLst>"#;
        let raw = parse_legacy_comments(xml, &authors);
        assert_eq!(raw.len(), 1);
        assert_eq!(raw[0].author, "Julie Lee");
        assert_eq!(raw[0].date, "2024-01-15T10:30:00.000");
        assert_eq!(raw[0].body, "Add a diagram here.");
        assert!(!raw[0].is_reply);
    }

    #[test]
    fn test_parse_legacy_comments_unknown_author_empty() {
        let authors = HashMap::new();
        let xml = r#"<?xml version="1.0"?><p:cmLst xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main"><p:cm authorId="7" dt="2024-01-15T10:30:00.000" idx="1"><p:text>Orphan author.</p:text></p:cm></p:cmLst>"#;
        let raw = parse_legacy_comments(xml, &authors);
        assert_eq!(raw[0].author, ""); // resolved to Unknown downstream
        assert_eq!(raw[0].body, "Orphan author.");
    }

    #[test]
    fn test_parse_modern_comments_with_reply() {
        let mut authors = HashMap::new();
        authors.insert("{A1}".to_string(), "Alice".to_string());
        authors.insert("{A2}".to_string(), "Bob".to_string());
        let xml = r#"<?xml version="1.0"?><p188:cmLst xmlns:p188="http://schemas.microsoft.com/office/powerpoint/2018/8/main" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"><p188:cm id="{C1}" authorId="{A1}" created="2024-01-15T10:30:00.000" status="active"><p188:replyLst><p188:reply id="{C2}" authorId="{A2}" created="2024-01-15T11:00:00.000" status="active"><p188:txBody><a:bodyPr/><a:p><a:r><a:t>Agreed, will fix.</a:t></a:r></a:p></p188:txBody></p188:reply></p188:replyLst><p188:txBody><a:bodyPr/><a:p><a:r><a:t>Needs a clearer title.</a:t></a:r></a:p></p188:txBody></p188:cm></p188:cmLst>"#;
        let raw = parse_modern_comments(xml, &authors);
        assert_eq!(raw.len(), 2);
        // The reply closes first (nested), then the parent.
        let reply = raw.iter().find(|c| c.is_reply).expect("a reply");
        let top = raw.iter().find(|c| !c.is_reply).expect("a top-level");
        assert_eq!(reply.author, "Bob");
        assert_eq!(reply.body, "Agreed, will fix.");
        assert_eq!(top.author, "Alice");
        assert_eq!(top.body, "Needs a clearer title.");
    }

    #[test]
    fn test_slide_label() {
        assert_eq!(
            slide_label(2, Some("Quarterly Results")),
            "Slide 2: Quarterly Results"
        );
        assert_eq!(slide_label(2, None), "Slide 2");
        assert_eq!(slide_label(3, Some("   ")), "Slide 3");
    }

    #[test]
    fn test_slide_label_collapses_newline_in_title() {
        // Finding 2: a multi-line title must not inject a newline that breaks the
        // single-line `- **source**:` list item.
        let label = slide_label(1, Some("Line1\nLine2"));
        assert!(!label.contains('\n'), "label has a newline: {label:?}");
        assert_eq!(label, "Slide 1: Line1 Line2");
    }

    #[test]
    fn test_parse_legacy_comments_self_closing_text_no_leak() {
        // Finding 5: self-closing <p:text/> must not capture stray sibling text.
        let xml = r#"<?xml version="1.0"?><p:cmLst xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main"><p:cm authorId="0" dt="d"><p:text/></p:cm></p:cmLst>"#;
        let authors = HashMap::new();
        let raw = parse_legacy_comments(xml, &authors);
        assert_eq!(raw.len(), 1);
        assert_eq!(raw[0].body, "");
    }

    #[test]
    fn test_parse_modern_comments_self_closing_text_no_leak() {
        // Finding 5: self-closing <a:t/> must not leak text into the body.
        let xml = r#"<?xml version="1.0"?><p188:cmLst xmlns:p188="http://schemas.microsoft.com/office/powerpoint/2018/8/main" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"><p188:cm authorId="a" created="c"><p188:txBody><a:p><a:r><a:t/></a:r><a:r><a:t>real</a:t></a:r></a:p></p188:txBody></p188:cm></p188:cmLst>"#;
        let authors = HashMap::new();
        let raw = parse_modern_comments(xml, &authors);
        assert_eq!(raw.len(), 1);
        assert_eq!(raw[0].body, "real");
    }

    #[test]
    fn test_parse_author_registry_unescapes_name() {
        // Finding 8: author names must be XML-unescaped.
        let xml = r#"<?xml version="1.0"?><p:cmAuthorLst xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main"><p:cmAuthor id="0" name="Ben &amp; Jerry"/></p:cmAuthorLst>"#;
        let authors = parse_author_registry(xml, "cmAuthor");
        assert_eq!(authors.get("0").map(|s| s.as_str()), Some("Ben & Jerry"));
    }

    // -- integration: build a PPTX with comment parts --

    /// Build a one-slide PPTX with a single legacy or modern comment part.
    /// `modern` selects the relationship type + comment-file XML scheme.
    fn build_pptx_with_comment(modern: bool, slide_title: Option<&str>) -> Vec<u8> {
        use std::io::Write;
        use zip::ZipWriter;
        use zip::write::SimpleFileOptions;

        let buf = Vec::new();
        let mut zip = ZipWriter::new(Cursor::new(buf));
        let opts = SimpleFileOptions::default();

        zip.start_file("[Content_Types].xml", opts).unwrap();
        zip.write_all(br#"<?xml version="1.0"?><Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types"><Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/><Default Extension="xml" ContentType="application/xml"/></Types>"#).unwrap();

        // presentation.xml + rels
        zip.start_file("ppt/presentation.xml", opts).unwrap();
        zip.write_all(br#"<?xml version="1.0"?><p:presentation xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"><p:sldIdLst><p:sldId id="256" r:id="rId1"/></p:sldIdLst></p:presentation>"#).unwrap();
        zip.start_file("ppt/_rels/presentation.xml.rels", opts)
            .unwrap();
        zip.write_all(br#"<?xml version="1.0"?><Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide" Target="slides/slide1.xml"/></Relationships>"#).unwrap();

        // slide1.xml
        let title_shape = match slide_title {
            Some(t) => format!(
                r#"<p:sp><p:nvSpPr><p:cNvPr id="1" name="Title"/><p:cNvSpPr/><p:nvPr><p:ph type="title"/></p:nvPr></p:nvSpPr><p:txBody><a:p><a:r><a:t>{t}</a:t></a:r></a:p></p:txBody></p:sp>"#
            ),
            None => String::new(),
        };
        let slide = format!(
            r#"<?xml version="1.0"?><p:sld xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"><p:cSld><p:spTree>{title_shape}</p:spTree></p:cSld></p:sld>"#
        );
        zip.start_file("ppt/slides/slide1.xml", opts).unwrap();
        zip.write_all(slide.as_bytes()).unwrap();

        // slide rels -> comment part
        let (rel_type, comment_path) = if modern {
            (
                "http://schemas.microsoft.com/office/2018/10/relationships/comments",
                "../comments/modernComment_x.xml",
            )
        } else {
            (
                "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments",
                "../comments/comment1.xml",
            )
        };
        zip.start_file("ppt/slides/_rels/slide1.xml.rels", opts)
            .unwrap();
        zip.write_all(
            format!(
                r#"<?xml version="1.0"?><Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rIdC" Type="{rel_type}" Target="{comment_path}"/></Relationships>"#
            )
            .as_bytes(),
        )
        .unwrap();

        if modern {
            zip.start_file("ppt/authors.xml", opts).unwrap();
            zip.write_all(br#"<?xml version="1.0"?><p188:authorLst xmlns:p188="http://schemas.microsoft.com/office/powerpoint/2018/8/main"><p188:author id="{A1}" name="Alice" userId="u" providerId="AD"/><p188:author id="{A2}" name="Bob" userId="u" providerId="AD"/></p188:authorLst>"#).unwrap();
            zip.start_file("ppt/comments/modernComment_x.xml", opts)
                .unwrap();
            zip.write_all(br#"<?xml version="1.0"?><p188:cmLst xmlns:p188="http://schemas.microsoft.com/office/powerpoint/2018/8/main" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"><p188:cm id="{C1}" authorId="{A1}" created="2024-01-15T10:30:00.000"><p188:replyLst><p188:reply id="{C2}" authorId="{A2}" created="2024-01-15T11:00:00.000"><p188:txBody><a:bodyPr/><a:p><a:r><a:t>Agreed.</a:t></a:r></a:p></p188:txBody></p188:reply></p188:replyLst><p188:txBody><a:bodyPr/><a:p><a:r><a:t>Please clarify.</a:t></a:r></a:p></p188:txBody></p188:cm></p188:cmLst>"#).unwrap();
        } else {
            zip.start_file("ppt/commentAuthors.xml", opts).unwrap();
            zip.write_all(br#"<?xml version="1.0"?><p:cmAuthorLst xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main"><p:cmAuthor id="0" name="Julie Lee" initials="JL"/></p:cmAuthorLst>"#).unwrap();
            zip.start_file("ppt/comments/comment1.xml", opts).unwrap();
            zip.write_all(br#"<?xml version="1.0"?><p:cmLst xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main"><p:cm authorId="0" dt="2024-01-15T10:30:00.000" idx="1"><p:pos x="10" y="10"/><p:text>Add a diagram here.</p:text></p:cm></p:cmLst>"#).unwrap();
        }

        let cursor = zip.finish().unwrap();
        cursor.into_inner()
    }

    #[test]
    fn test_pptx_legacy_comments_end_to_end() {
        let data = build_pptx_with_comment(false, Some("Overview"));
        let options = ConversionOptions {
            extract_comments: true,
            ..Default::default()
        };
        let result = PptxConverter.convert(&data, &options).unwrap();
        assert!(
            result.markdown.contains("# Comments"),
            "md: {}",
            result.markdown
        );
        assert!(result.markdown.contains("## 1"));
        assert!(
            result
                .markdown
                .contains("- **author**: Julie Lee (2024-01-15T10:30:00.000)")
        );
        assert!(
            result
                .markdown
                .contains("- **comment**: Add a diagram here.")
        );
        assert!(result.markdown.contains("- **source**: Slide 1: Overview"));
    }

    #[test]
    fn test_pptx_modern_comments_with_reply_end_to_end() {
        let data = build_pptx_with_comment(true, None);
        let options = ConversionOptions {
            extract_comments: true,
            ..Default::default()
        };
        let result = PptxConverter.convert(&data, &options).unwrap();
        assert!(
            result.markdown.contains("# Comments"),
            "md: {}",
            result.markdown
        );
        assert!(result.markdown.contains("- **comment**: Please clarify."));
        assert!(
            result.markdown.contains("- **comment**: (reply) Agreed."),
            "reply not marked, md: {}",
            result.markdown
        );
        // Parent comment must be emitted before its reply (document order).
        let parent = result.markdown.find("Please clarify.").unwrap();
        let reply = result.markdown.find("(reply) Agreed.").unwrap();
        assert!(
            parent < reply,
            "parent must precede reply, md: {}",
            result.markdown
        );
        // No slide title -> bare "Slide 1".
        assert!(result.markdown.contains("- **source**: Slide 1\n"));
    }

    #[test]
    fn test_pptx_comments_absent_when_flag_off() {
        let data = build_pptx_with_comment(false, Some("Overview"));
        let result = PptxConverter
            .convert(&data, &ConversionOptions::default())
            .unwrap();
        assert!(!result.markdown.contains("# Comments"));
    }

    #[test]
    fn test_pptx_comments_plain_text_stripped() {
        let data = build_pptx_with_comment(false, Some("Overview"));
        let options = ConversionOptions {
            extract_comments: true,
            ..Default::default()
        };
        let result = PptxConverter.convert(&data, &options).unwrap();
        assert!(result.plain_text.contains("Comments\n"));
        assert!(
            result
                .plain_text
                .contains("author: Julie Lee (2024-01-15T10:30:00.000)")
        );
        assert!(result.plain_text.contains("source: Slide 1: Overview"));
        assert!(!result.plain_text.contains("# Comments"));
        assert!(!result.plain_text.contains("**"));
    }

    #[test]
    fn test_pptx_dual_scheme_no_double_report() {
        // Finding 2: a slide referencing BOTH a legacy and a modern comment part
        // (same thread, for back-compat) must not report the comment twice.
        use std::io::Write;
        use zip::ZipWriter;
        use zip::write::SimpleFileOptions;

        let buf = Vec::new();
        let mut zip = ZipWriter::new(Cursor::new(buf));
        let opts = SimpleFileOptions::default();

        zip.start_file("[Content_Types].xml", opts).unwrap();
        zip.write_all(br#"<?xml version="1.0"?><Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types"><Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/><Default Extension="xml" ContentType="application/xml"/></Types>"#).unwrap();
        zip.start_file("ppt/presentation.xml", opts).unwrap();
        zip.write_all(br#"<?xml version="1.0"?><p:presentation xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"><p:sldIdLst><p:sldId id="256" r:id="rId1"/></p:sldIdLst></p:presentation>"#).unwrap();
        zip.start_file("ppt/_rels/presentation.xml.rels", opts)
            .unwrap();
        zip.write_all(br#"<?xml version="1.0"?><Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide" Target="slides/slide1.xml"/></Relationships>"#).unwrap();
        zip.start_file("ppt/slides/slide1.xml", opts).unwrap();
        zip.write_all(br#"<?xml version="1.0"?><p:sld xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"><p:cSld><p:spTree/></p:cSld></p:sld>"#).unwrap();
        // Slide rels carry BOTH a legacy and a modern comments relationship.
        zip.start_file("ppt/slides/_rels/slide1.xml.rels", opts)
            .unwrap();
        zip.write_all(br#"<?xml version="1.0"?><Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rIdL" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments" Target="../comments/comment1.xml"/><Relationship Id="rIdM" Type="http://schemas.microsoft.com/office/2018/10/relationships/comments" Target="../comments/modernComment_x.xml"/></Relationships>"#).unwrap();
        zip.start_file("ppt/commentAuthors.xml", opts).unwrap();
        zip.write_all(br#"<?xml version="1.0"?><p:cmAuthorLst xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main"><p:cmAuthor id="0" name="Dana"/></p:cmAuthorLst>"#).unwrap();
        zip.start_file("ppt/comments/comment1.xml", opts).unwrap();
        zip.write_all(br#"<?xml version="1.0"?><p:cmLst xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main"><p:cm authorId="0" dt="d" idx="1"><p:text>Shared thread.</p:text></p:cm></p:cmLst>"#).unwrap();
        zip.start_file("ppt/authors.xml", opts).unwrap();
        zip.write_all(br#"<?xml version="1.0"?><p188:authorLst xmlns:p188="http://schemas.microsoft.com/office/powerpoint/2018/8/main"><p188:author id="{A}" name="Dana" userId="u" providerId="AD"/></p188:authorLst>"#).unwrap();
        zip.start_file("ppt/comments/modernComment_x.xml", opts)
            .unwrap();
        zip.write_all(br#"<?xml version="1.0"?><p188:cmLst xmlns:p188="http://schemas.microsoft.com/office/powerpoint/2018/8/main" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"><p188:cm authorId="{A}" created="c"><p188:txBody><a:p><a:r><a:t>Shared thread.</a:t></a:r></a:p></p188:txBody></p188:cm></p188:cmLst>"#).unwrap();

        let data = zip.finish().unwrap().into_inner();
        let options = ConversionOptions {
            extract_comments: true,
            ..Default::default()
        };
        let result = PptxConverter.convert(&data, &options).unwrap();
        // Exactly one comment, not two.
        assert_eq!(
            result
                .markdown
                .matches("- **comment**: Shared thread.")
                .count(),
            1,
            "comment double-reported, md: {}",
            result.markdown
        );
        assert!(result.markdown.contains("## 1"));
        assert!(!result.markdown.contains("## 2"));
    }
}