hippox 0.5.0

🦛A reliable AI agent and skills orchestration runtime engine.
use anyhow::Result;
use quick_xml::{Reader, events::Event};
use serde_json::{Value, json};
use std::collections::HashMap;

use crate::executors::{
    ensure_dir, file_exists, read_file_content,
    types::{Skill, SkillParameter},
    validate_path, write_file_content,
};

#[derive(Debug)]
pub struct DocxReadSkill;

#[async_trait::async_trait]
impl Skill for DocxReadSkill {
    fn name(&self) -> &str {
        "docx_read"
    }

    fn description(&self) -> &str {
        "Read and extract text content from Word (.docx) files"
    }

    fn usage_hint(&self) -> &str {
        "Use this skill when the user wants to read Microsoft Word documents, extract text, or convert DOCX to plain text"
    }

    fn parameters(&self) -> Vec<SkillParameter> {
        vec![
            SkillParameter {
                name: "path".to_string(),
                param_type: "string".to_string(),
                description: "Path to the DOCX file".to_string(),
                required: true,
                default: None,
                example: Some(Value::String("document.docx".to_string())),
                enum_values: None,
            },
            SkillParameter {
                name: "include_tables".to_string(),
                param_type: "boolean".to_string(),
                description: "Include table data in output".to_string(),
                required: false,
                default: Some(Value::Bool(true)),
                example: Some(Value::Bool(true)),
                enum_values: None,
            },
        ]
    }

    fn example_call(&self) -> Value {
        json!({
            "action": "docx_read",
            "parameters": {
                "path": "document.docx"
            }
        })
    }

    fn example_output(&self) -> String {
        "Document content extracted from Word file...".to_string()
    }

    fn category(&self) -> &str {
        "document"
    }

    async fn execute(&self, parameters: &HashMap<String, Value>) -> Result<String> {
        let path = parameters
            .get("path")
            .and_then(|v| v.as_str())
            .ok_or_else(|| anyhow::anyhow!("Missing 'path' parameter"))?;
        let include_tables = parameters
            .get("include_tables")
            .and_then(|v| v.as_bool())
            .unwrap_or(true);

        let validated_path = validate_path(path, None)?;
        if !file_exists(&validated_path.to_string_lossy()) {
            anyhow::bail!("DOCX file not found: {}", path);
        }

        use quick_xml::Reader;
        use quick_xml::events::Event;
        use std::fs::File;
        use zip::ZipArchive;
        let file = File::open(&validated_path)?;
        let mut archive = ZipArchive::new(file)?;
        let mut document_content = None;
        for i in 0..archive.len() {
            let entry = archive.by_index(i)?;
            if entry.name() == "word/document.xml" {
                let mut content = String::new();
                let mut reader = std::io::BufReader::new(entry);
                std::io::Read::read_to_string(&mut reader, &mut content)?;
                document_content = Some(content);
                break;
            }
        }
        let content = document_content
            .ok_or_else(|| anyhow::anyhow!("No document.xml found in DOCX file"))?;
        let text = extract_text_from_docx_xml(&content, include_tables);
        Ok(text)
    }

    fn validate(&self, parameters: &HashMap<String, Value>) -> Result<()> {
        parameters
            .get("path")
            .and_then(|v| v.as_str())
            .ok_or_else(|| anyhow::anyhow!("Missing required parameter: path"))?;
        Ok(())
    }
}

#[derive(Debug)]
pub struct DocxInfoSkill;

#[async_trait::async_trait]
impl Skill for DocxInfoSkill {
    fn name(&self) -> &str {
        "docx_info"
    }

    fn description(&self) -> &str {
        "Get metadata and structure information about a Word document"
    }

    fn usage_hint(&self) -> &str {
        "Use this skill when the user wants to get document properties, word count, or file info"
    }

    fn parameters(&self) -> Vec<SkillParameter> {
        vec![SkillParameter {
            name: "path".to_string(),
            param_type: "string".to_string(),
            description: "Path to the DOCX file".to_string(),
            required: true,
            default: None,
            example: Some(Value::String("document.docx".to_string())),
            enum_values: None,
        }]
    }

    fn example_call(&self) -> Value {
        json!({
            "action": "docx_info",
            "parameters": {
                "path": "document.docx"
            }
        })
    }

    fn example_output(&self) -> String {
        "Word count: 1500\nPages: 5\nFile size: 120 KB".to_string()
    }

    fn category(&self) -> &str {
        "document"
    }

    async fn execute(&self, parameters: &HashMap<String, Value>) -> Result<String> {
        let path = parameters
            .get("path")
            .and_then(|v| v.as_str())
            .ok_or_else(|| anyhow::anyhow!("Missing 'path' parameter"))?;
        let validated_path = validate_path(path, None)?;
        if !file_exists(&validated_path.to_string_lossy()) {
            anyhow::bail!("DOCX file not found: {}", path);
        }
        use std::fs::File;
        use zip::ZipArchive;
        let file = File::open(&validated_path)?;
        let mut archive = ZipArchive::new(file)?;
        let metadata = std::fs::metadata(&validated_path)?;
        let file_size = metadata.len();
        let mut document_content = None;
        for i in 0..archive.len() {
            let entry = archive.by_index(i)?;
            if entry.name() == "word/document.xml" {
                let mut content = String::new();
                let mut reader = std::io::BufReader::new(entry);
                std::io::Read::read_to_string(&mut reader, &mut content)?;
                document_content = Some(content);
                break;
            }
        }
        let mut output = String::new();
        output.push_str(&format!("File: {}\n", path));
        output.push_str(&format!("File size: {:.2} KB\n", file_size as f64 / 1024.0));
        if let Some(content) = document_content {
            let text = extract_text_from_docx_xml(&content, false);
            let word_count = text.split_whitespace().count();
            let char_count = text.chars().count();
            let line_count = text.lines().count();
            output.push_str(&format!("Word count: {}\n", word_count));
            output.push_str(&format!("Character count: {}\n", char_count));
            output.push_str(&format!("Line count: {}\n", line_count));
        } else {
            output.push_str("Unable to extract document content\n");
        }
        Ok(output)
    }

    fn validate(&self, parameters: &HashMap<String, Value>) -> Result<()> {
        parameters
            .get("path")
            .and_then(|v| v.as_str())
            .ok_or_else(|| anyhow::anyhow!("Missing required parameter: path"))?;
        Ok(())
    }
}

fn extract_text_from_docx_xml(xml: &str, include_tables: bool) -> String {
    let mut reader = Reader::from_str(xml);
    reader.config_mut().trim_text(true);
    let mut text_parts = Vec::new();
    let mut in_text = false;
    let mut in_table = false;
    let mut table_content = Vec::new();
    let mut current_row = Vec::new();
    let mut buf = Vec::new();
    loop {
        match reader.read_event_into(&mut buf) {
            Ok(Event::Start(ref e)) => match e.name().as_ref() {
                b"w:t" => in_text = true,
                b"w:tbl" => {
                    if include_tables {
                        in_table = true;
                    }
                }
                b"w:tr" if in_table => {
                    current_row.clear();
                }
                _ => {}
            },
            Ok(Event::Text(e)) => {
                if in_text {
                    if let Ok(text) = e.decode() {
                        let trimmed = text.trim();
                        if !trimmed.is_empty() && !in_table {
                            text_parts.push(trimmed.to_string());
                        } else if in_table && include_tables {
                            current_row.push(trimmed.to_string());
                        }
                    }
                }
            }
            Ok(Event::End(ref e)) => match e.name().as_ref() {
                b"w:t" => in_text = false,
                b"w:tr" if in_table => {
                    if include_tables && !current_row.is_empty() {
                        table_content.push(current_row.clone());
                    }
                    current_row.clear();
                }
                b"w:tbl" => {
                    if include_tables && !table_content.is_empty() {
                        text_parts.push(format_table(&table_content));
                        table_content.clear();
                    }
                    in_table = false;
                }
                b"w:p" => {
                    if !in_table {
                        text_parts.push("\n".to_string());
                    }
                }
                _ => {}
            },
            Ok(Event::Eof) => break,
            Err(e) => {
                eprintln!("Error parsing XML: {}", e);
                break;
            }
            _ => {}
        }
        buf.clear();
    }
    text_parts.join(" ")
}

fn format_table(table: &[Vec<String>]) -> String {
    if table.is_empty() {
        return String::new();
    }
    let mut output = String::from("\n[TABLE]\n");
    for row in table {
        output.push_str("| ");
        for cell in row {
            output.push_str(&format!("{} | ", cell));
        }
        output.push('\n');
    }
    output.push_str("[/TABLE]\n");
    output
}