kumo 0.4.0

An async web crawling framework for Rust - Scrapy for Rust
Documentation
use crate::error::KumoError;
use rig::OneOrMany;
use rig::completion::{AssistantContent, ToolDefinition};
use serde_json::Value;

pub(super) struct ExtractArgs {
    pub user_content: String,
    pub system: String,
    pub tool: ToolDefinition,
}

pub(super) fn build_extract_args(
    schema: &Value,
    html: &str,
    system_prompt: Option<&str>,
    prompt_template: Option<&str>,
    strip_scripts: bool,
) -> ExtractArgs {
    let html = if strip_scripts {
        super::prompt::strip_scripts_and_styles(html)
    } else {
        html.to_string()
    };

    let user_template = prompt_template.unwrap_or(super::prompt::DEFAULT_USER_PROMPT);
    let user_content = super::prompt::render_user_prompt(user_template, &html);
    let system = system_prompt
        .unwrap_or(super::prompt::DEFAULT_SYSTEM_PROMPT)
        .to_string();

    let tool = ToolDefinition {
        name: "extract".to_string(),
        description: "Extract structured data from the provided HTML.".to_string(),
        parameters: schema.clone(),
    };

    ExtractArgs {
        user_content,
        system,
        tool,
    }
}

pub(super) fn extract_tool_input(
    choice: OneOrMany<AssistantContent>,
    tool_name: &str,
) -> Result<Value, KumoError> {
    for content in choice {
        if let AssistantContent::ToolCall(tc) = content
            && tc.function.name == tool_name
        {
            return Ok(tc.function.arguments);
        }
    }
    Err(KumoError::Llm(format!(
        "no '{tool_name}' tool_use block in LLM response"
    )))
}

pub(super) fn llm_err(msg: impl std::fmt::Display) -> KumoError {
    KumoError::Llm(msg.to_string())
}