cooklang-import 0.9.2

A tool for importing recipes into Cooklang format
Documentation
use crate::pipelines::RecipeComponents;
use reqwest::Client;
use serde_json::Value;
use std::env;
use std::error::Error;

const PROMPT: &str = r#"
You're an expert in extracting recipe information from messy texts (often OCR'd from images).
Sometimes the text is not a recipe - in that case specify that in the error field.

IMPORTANT: Only extract information that is EXPLICITLY present in the text. Do NOT invent, guess, or estimate any values. If a field is not mentioned in the text, use null.

Given the text, output only this JSON without any other characters:

{
  "title": "<RECIPE TITLE OR null IF NOT EXPLICITLY STATED>",
  "servings": "<SERVINGS OR null IF NOT EXPLICITLY STATED>",
  "prep_time": "<PREP TIME OR null IF NOT EXPLICITLY STATED>",
  "cook_time": "<COOK TIME OR null IF NOT EXPLICITLY STATED>",
  "total_time": "<TOTAL TIME OR null IF NOT EXPLICITLY STATED>",
  "ingredients": ["<LIST OF INGREDIENTS>"],
  "instructions": ["<LIST OF INSTRUCTIONS>"],
  "error": "<ERROR MESSAGE IF NO RECIPE, OTHERWISE null>"
}
"#;

const MODEL: &str = "gpt-4o-mini";

pub struct TextExtractor;

impl TextExtractor {
    /// Check if the TextExtractor is available (has required API key configured)
    pub fn is_available() -> bool {
        env::var("OPENAI_API_KEY").is_ok()
    }

    pub async fn extract(
        plain_text: &str,
        source: &str,
    ) -> Result<RecipeComponents, Box<dyn Error + Send + Sync>> {
        let json = fetch_json(plain_text.to_string()).await?;

        // Check for error (not a recipe)
        if let Some(error) = json["error"].as_str() {
            if !error.is_empty() {
                return Err(error.into());
            }
        }

        // Extract title (fallback to empty string)
        let name = json["title"].as_str().unwrap_or("").to_string();

        // Build metadata YAML from available fields
        let mut metadata_lines = vec![format!("source: {}", source)];
        for field in ["servings", "prep_time", "cook_time", "total_time"] {
            if let Some(val) = json[field].as_str() {
                if !val.is_empty() {
                    metadata_lines.push(format!("{}: {}", field, val));
                }
            }
        }
        let metadata = metadata_lines.join("\n");

        // Format ingredients as newline-separated list
        let ingredients = json["ingredients"]
            .as_array()
            .unwrap_or(&Vec::new())
            .iter()
            .filter_map(|i| i.as_str().map(String::from))
            .collect::<Vec<String>>()
            .join("\n");

        // Format instructions as space-separated (paragraph)
        let instructions = json["instructions"]
            .as_array()
            .unwrap_or(&Vec::new())
            .iter()
            .filter_map(|i| i.as_str().map(String::from))
            .collect::<Vec<String>>()
            .join(" ");

        // Combine ingredients and instructions
        let text = format!("{}\n\n{}", ingredients, instructions);

        Ok(RecipeComponents {
            text,
            metadata,
            name,
        })
    }
}

async fn fetch_json(texts: String) -> Result<Value, Box<dyn Error + Send + Sync>> {
    let api_key = env::var("OPENAI_API_KEY")?;

    // For testing environment, return mock data
    if api_key == "test_key" {
        return Ok(serde_json::json!({
            "title": "Test Recipe",
            "servings": "4",
            "prep_time": "10 min",
            "cook_time": "20 min",
            "total_time": "30 min",
            "ingredients": ["pasta", "sauce"],
            "instructions": ["Cook pasta with sauce"],
            "error": null
        }));
    }

    let response = Client::new()
        .post("https://api.openai.com/v1/chat/completions")
        .header("Authorization", format!("Bearer {api_key}"))
        .json(&serde_json::json!({
            "model": MODEL,
            "messages": [
                { "role": "system", "content": PROMPT },
                { "role": "user", "content": texts }
            ]
        }))
        .send()
        .await?
        .json::<Value>()
        .await?;

    let content = response["choices"][0]["message"]["content"]
        .as_str()
        .ok_or("Failed to get response content")?;

    serde_json::from_str(content).map_err(|e| e.into())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[tokio::test]
    async fn test_extract_returns_recipe_components() {
        std::env::set_var("OPENAI_API_KEY", "test_key");

        let result = TextExtractor::extract("some recipe text", "test-source").await;

        assert!(result.is_ok());
        let components = result.unwrap();

        assert_eq!(components.name, "Test Recipe");
        assert!(components.metadata.contains("source: test-source"));
        assert!(components.metadata.contains("servings: 4"));
        assert!(components.metadata.contains("prep_time: 10 min"));
        assert!(components.metadata.contains("cook_time: 20 min"));
        assert!(components.metadata.contains("total_time: 30 min"));
        assert!(components.text.contains("pasta"));
        assert!(components.text.contains("sauce"));
        assert!(components.text.contains("Cook pasta with sauce"));
    }

    #[test]
    fn test_is_available_without_key() {
        std::env::remove_var("OPENAI_API_KEY");
        assert!(!TextExtractor::is_available());
    }

    #[test]
    fn test_is_available_with_key() {
        std::env::set_var("OPENAI_API_KEY", "test_key");
        assert!(TextExtractor::is_available());
    }
}