elektromail 0.1.1

//! MIME structure parsing for IMAP BODYSTRUCTURE and section fetching.

use std::collections::HashMap;

/// Represents a parsed MIME structure.
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub enum MimePart {
    /// A text/* part (text/plain, text/html, etc.)
    Text {
        subtype: String,
        charset: Option<String>,
        encoding: String,
        size: usize,
        lines: usize,
        body_offset: usize,
        body_len: usize,
    },
    /// A multipart/* container (multipart/mixed, multipart/alternative, etc.)
    Multipart {
        subtype: String,
        boundary: String,
        parts: Vec<MimePart>,
    },
    /// Any other content type (application/*, image/*, etc.)
    Other {
        media_type: String,
        subtype: String,
        encoding: String,
        size: usize,
        body_offset: usize,
        body_len: usize,
    },
}

/// Parse Content-Type header into (media_type, subtype, parameters).
/// Returns ("text", "plain", empty params) as default if parsing fails.
fn parse_content_type(header: &str) -> (String, String, HashMap<String, String>) {
    let mut params = HashMap::new();
    let header = header.trim();

    // Split on semicolon to separate type from parameters
    let mut parts = header.split(';');
    let type_part = parts.next().unwrap_or("text/plain").trim();

    // Parse media type and subtype
    let (media_type, subtype) = if let Some((t, s)) = type_part.split_once('/') {
        (t.trim().to_lowercase(), s.trim().to_lowercase())
    } else {
        ("text".to_string(), "plain".to_string())
    };

    // Parse parameters (e.g., boundary="xyz", charset="utf-8")
    for param in parts {
        let param = param.trim();
        if let Some((key, value)) = param.split_once('=') {
            let key = key.trim().to_lowercase();
            let value = value.trim().trim_matches('"').to_string();
            params.insert(key, value);
        }
    }

    (media_type, subtype, params)
}

/// Parse Content-Transfer-Encoding header.
fn parse_encoding(header: Option<&str>) -> String {
    header
        .map(|h| h.trim().to_uppercase())
        .unwrap_or_else(|| "7BIT".to_string())
}

/// Find header value in raw message data.
fn header_value<'a>(data: &'a [u8], name: &str) -> Option<&'a str> {
    let needle = format!("{}:", name.to_lowercase());
    for line in data.split(|b| *b == b'\n') {
        let line = line.strip_suffix(b"\r").unwrap_or(line);
        if line.is_empty() {
            break;
        }
        let line_str = std::str::from_utf8(line).ok()?;
        if line_str.to_lowercase().starts_with(&needle) {
            let value = line_str[needle.len()..].trim();
            if !value.is_empty() {
                return Some(value);
            }
        }
    }
    None
}

/// Find the offset where the body starts (after headers).
fn find_body_offset(data: &[u8]) -> usize {
    let needle = b"\r\n\r\n";
    data.windows(needle.len())
        .position(|w| w == needle)
        .map(|pos| pos + needle.len())
        .unwrap_or(data.len())
}

/// Count lines in a byte slice.
#[allow(clippy::naive_bytecount)]
fn count_lines(data: &[u8]) -> usize {
    data.iter().filter(|&&b| b == b'\n').count()
}

/// Find boundary positions in a multipart body.
/// Returns a list of (start, end) offsets for each part.
fn find_boundary_positions(data: &[u8], boundary: &str) -> Vec<(usize, usize)> {
    let boundary_start = format!("--{}", boundary);
    let boundary_end = format!("--{}--", boundary);
    let mut positions = Vec::new();
    let mut current_start = None;

    let data_str = String::from_utf8_lossy(data);

    for (idx, line) in data_str.lines().enumerate() {
        let line_trimmed = line.trim();
        if line_trimmed == boundary_start || line_trimmed.starts_with(&boundary_start) {
            if let Some(start) = current_start {
                // Find the byte offset for this line
                let end_offset = find_line_offset(&data_str, idx);
                positions.push((start, end_offset));
            }
            // Next part starts after this line
            current_start = Some(find_line_offset(&data_str, idx + 1));
        } else if line_trimmed == boundary_end || line_trimmed.starts_with(&boundary_end) {
            if let Some(start) = current_start {
                let end_offset = find_line_offset(&data_str, idx);
                positions.push((start, end_offset));
            }
            break;
        }
    }

    positions
}

/// Find byte offset of a specific line number in the string.
fn find_line_offset(s: &str, target_line: usize) -> usize {
    let mut offset = 0;
    for (idx, line) in s.lines().enumerate() {
        if idx == target_line {
            return offset;
        }
        offset += line.len() + 1; // +1 for newline
    }
    s.len()
}

/// Parse MIME structure from raw message data.
pub fn parse_mime(data: &[u8]) -> MimePart {
    parse_mime_part(data, 0)
}

/// Parse a single MIME part, potentially recursively for multipart.
fn parse_mime_part(data: &[u8], base_offset: usize) -> MimePart {
    let content_type = header_value(data, "Content-Type")
        .map(String::from)
        .unwrap_or_else(|| "text/plain".to_string());
    let encoding = parse_encoding(header_value(data, "Content-Transfer-Encoding"));

    let (media_type, subtype, params) = parse_content_type(&content_type);
    let body_offset = find_body_offset(data);
    let body = &data[body_offset..];

    if media_type == "multipart" {
        let boundary = params.get("boundary").cloned().unwrap_or_default();

        if boundary.is_empty() {
            // Malformed multipart, treat as text
            return MimePart::Text {
                subtype: "plain".to_string(),
                charset: params.get("charset").cloned(),
                encoding,
                size: body.len(),
                lines: count_lines(body),
                body_offset: base_offset + body_offset,
                body_len: body.len(),
            };
        }

        let positions = find_boundary_positions(body, &boundary);
        let mut parts = Vec::new();

        for (start, end) in positions {
            if start < end && end <= body.len() {
                let part_data = &body[start..end];
                let part = parse_mime_part(part_data, base_offset + body_offset + start);
                parts.push(part);
            }
        }

        MimePart::Multipart {
            subtype,
            boundary,
            parts,
        }
    } else if media_type == "text" {
        MimePart::Text {
            subtype,
            charset: params.get("charset").cloned(),
            encoding,
            size: body.len(),
            lines: count_lines(body),
            body_offset: base_offset + body_offset,
            body_len: body.len(),
        }
    } else {
        MimePart::Other {
            media_type,
            subtype,
            encoding,
            size: body.len(),
            body_offset: base_offset + body_offset,
            body_len: body.len(),
        }
    }
}

/// Format a MIME structure as an IMAP BODYSTRUCTURE response.
pub fn format_bodystructure(part: &MimePart) -> String {
    match part {
        MimePart::Text {
            subtype,
            charset,
            encoding,
            size,
            lines,
            ..
        } => {
            let cs = charset.as_deref().unwrap_or("US-ASCII");
            format!(
                "(\"TEXT\" \"{}\" (\"CHARSET\" \"{}\") NIL NIL \"{}\" {} {})",
                subtype.to_uppercase(),
                cs.to_uppercase(),
                encoding,
                size,
                lines
            )
        }
        MimePart::Multipart { subtype, parts, .. } => {
            if parts.is_empty() {
                // Empty multipart, return as text
                return "(\"TEXT\" \"PLAIN\" (\"CHARSET\" \"US-ASCII\") NIL NIL \"7BIT\" 0 0)"
                    .to_string();
            }
            let parts_str: String = parts.iter().map(format_bodystructure).collect();
            format!("({} \"{}\")", parts_str, subtype.to_uppercase())
        }
        MimePart::Other {
            media_type,
            subtype,
            encoding,
            size,
            ..
        } => {
            format!(
                "(\"{}\" \"{}\" NIL NIL NIL \"{}\" {})",
                media_type.to_uppercase(),
                subtype.to_uppercase(),
                encoding,
                size
            )
        }
    }
}

/// Extract a specific section from MIME data.
/// Supports section numbers like "1", "2", "1.1", "2.1", "TEXT", "HEADER", "1.MIME", etc.
pub fn extract_section<'a>(data: &'a [u8], section: &str) -> Option<&'a [u8]> {
    let section = section.trim();

    if section.is_empty() {
        // Empty section means entire message
        return Some(data);
    }

    let section_upper = section.to_uppercase();

    // Handle special sections
    if section_upper == "HEADER" {
        return Some(extract_headers(data));
    }
    if section_upper == "TEXT" {
        return Some(extract_body(data));
    }

    // Handle HEADER.FIELDS and HEADER.FIELDS.NOT - simplified, just return all headers
    if section_upper.starts_with("HEADER.FIELDS") {
        return Some(extract_headers(data));
    }

    // Parse numeric section (e.g., "1", "2.1", "1.2.3")
    let parts: Vec<&str> = section.split('.').collect();

    // Check if last part is MIME or HEADER
    let (section_nums, suffix) = if let Some(last) = parts.last() {
        let upper = last.to_uppercase();
        if upper == "MIME" || upper == "HEADER" || upper == "TEXT" {
            (&parts[..parts.len() - 1], Some(upper))
        } else {
            (&parts[..], None)
        }
    } else {
        return None;
    };

    // Navigate to the requested part
    let mut current_data = data;
    let mime_structure = parse_mime(data);
    let mut current_part = &mime_structure;

    for part_num_str in section_nums {
        let part_num: usize = part_num_str.parse().ok()?;
        if part_num == 0 {
            return None;
        }

        match current_part {
            MimePart::Multipart { parts, .. } => {
                current_part = parts.get(part_num - 1)?;
            }
            MimePart::Text {
                body_offset,
                body_len,
                ..
            }
            | MimePart::Other {
                body_offset,
                body_len,
                ..
            } => {
                // For non-multipart, part 1 is the content itself
                if part_num == 1 && section_nums.len() == 1 {
                    let start = *body_offset;
                    let end = start + body_len;
                    if end <= data.len() {
                        current_data = &data[start..end];
                    }
                    break;
                }
                return None;
            }
        }
    }

    // Extract the appropriate part of the content
    match (current_part, suffix.as_deref()) {
        (
            MimePart::Text {
                body_offset,
                body_len,
                ..
            },
            None,
        )
        | (
            MimePart::Other {
                body_offset,
                body_len,
                ..
            },
            None,
        ) => {
            let start = *body_offset;
            let end = start + body_len;
            if end <= data.len() {
                Some(&data[start..end])
            } else {
                Some(&data[start..])
            }
        }
        (MimePart::Multipart { .. }, None) => {
            // Multipart without suffix returns the entire part
            Some(current_data)
        }
        (_, Some("HEADER")) | (_, Some("MIME")) => Some(extract_headers(current_data)),
        (_, Some("TEXT")) => Some(extract_body(current_data)),
        _ => None,
    }
}

/// Extract headers from message data (up to and including the blank line).
fn extract_headers(data: &[u8]) -> &[u8] {
    let needle = b"\r\n\r\n";
    if let Some(pos) = data.windows(needle.len()).position(|w| w == needle) {
        &data[..pos + needle.len()]
    } else {
        data
    }
}

/// Extract body from message data (after the blank line).
fn extract_body(data: &[u8]) -> &[u8] {
    let needle = b"\r\n\r\n";
    if let Some(pos) = data.windows(needle.len()).position(|w| w == needle) {
        &data[pos + needle.len()..]
    } else {
        &[]
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_parse_content_type_simple() {
        let (media, subtype, params) = parse_content_type("text/plain");
        assert_eq!(media, "text");
        assert_eq!(subtype, "plain");
        assert!(params.is_empty());
    }

    #[test]
    fn test_parse_content_type_with_charset() {
        let (media, subtype, params) = parse_content_type("text/html; charset=UTF-8");
        assert_eq!(media, "text");
        assert_eq!(subtype, "html");
        assert_eq!(params.get("charset"), Some(&"UTF-8".to_string()));
    }

    #[test]
    fn test_parse_content_type_multipart() {
        let (media, subtype, params) =
            parse_content_type("multipart/mixed; boundary=\"boundary123\"");
        assert_eq!(media, "multipart");
        assert_eq!(subtype, "mixed");
        assert_eq!(params.get("boundary"), Some(&"boundary123".to_string()));
    }

    #[test]
    fn test_parse_simple_text_message() {
        let msg = b"Content-Type: text/plain; charset=UTF-8\r\n\r\nHello, World!\r\n";
        let part = parse_mime(msg);
        match part {
            MimePart::Text {
                subtype, charset, ..
            } => {
                assert_eq!(subtype, "plain");
                assert_eq!(charset, Some("UTF-8".to_string()));
            }
            _ => panic!("Expected Text part"),
        }
    }

    #[test]
    fn test_format_bodystructure_text() {
        let part = MimePart::Text {
            subtype: "plain".to_string(),
            charset: Some("UTF-8".to_string()),
            encoding: "7BIT".to_string(),
            size: 100,
            lines: 5,
            body_offset: 0,
            body_len: 100,
        };
        let result = format_bodystructure(&part);
        assert!(result.contains("\"TEXT\""));
        assert!(result.contains("\"PLAIN\""));
        assert!(result.contains("\"UTF-8\""));
        assert!(result.contains("\"7BIT\""));
    }

    #[test]
    fn test_extract_section_header() {
        let msg = b"Subject: Test\r\nFrom: test@test.com\r\n\r\nBody here";
        let headers = extract_section(msg, "HEADER").unwrap();
        assert!(headers.starts_with(b"Subject:"));
        assert!(headers.ends_with(b"\r\n\r\n"));
    }

    #[test]
    fn test_extract_section_text() {
        let msg = b"Subject: Test\r\n\r\nBody here";
        let body = extract_section(msg, "TEXT").unwrap();
        assert_eq!(body, b"Body here");
    }
}