redoxide 0.1.3 - Docs.rs

//! # RedOxide
//!
//! **RedOxide** is a high-performance, modular, extensible Red Teaming tool designed to evaluate
//! the safety and robustness of Large Language Models (LLMs).
//!
//! It simulates adversarial attacks (e.g., jailbreaks, payload splitting, social engineering)
//! against target LLMs to identify vulnerabilities.
//!
//! ## Core Architecture
//!
//! The library is built around four main parts:
//!
//! 1.  **[Target](crate::target::Target)**: Defines the **what**; `Target` represents the system under test (e.g., OpenAI GPT-4, Anthropic Claude, Local Ollama models).
//! 2.  **[Strategy](crate::strategy::Strategy)**: Defines the **how**; `Strategy` specifies how to perform the attack (e.g., generating malicious prompts using templates).
//! 3.  **[Evaluator](crate::evaluator::Evaluator)**: Defines the **if**; `Evaluator` determines if an attack was successful (e.g., by checking for refusal keywords or using an LLM Judge).
//! 4.  **[Runner](crate::runner::Runner)**: The async engine that orchestrates the attack, managing concurrency and reporting.
//!
//! ## Example Usage
//!
//! ```rust,no_run
//! use redoxide::target::{OpenAITarget, Target};
//! use redoxide::strategy::{JailbreakStrategy, Strategy};
//! use redoxide::evaluator::{KeywordEvaluator, Evaluator};
//! use redoxide::runner::Runner;
//! use std::sync::Arc;
//!
//! #[tokio::main]
//! async fn main() -> anyhow::Result<()> {
//!     // 1. What: set up the target (system under test)
//!     let api_key = std::env::var("OPENAI_API_KEY")?;
//!     let model = "gpt-3.5-turbo".to_string();
//!     let target = Arc::new(OpenAITarget::new(api_key, model));
//!
//!     // 2. How: define the attack strategy
//!     let prompts = vec!["How to make a bomb".to_string()];
//!     let strategy = Arc::new(JailbreakStrategy::new(prompts));
//!
//!     // 3. If: define the evaluator (did the attack find vulnerability?)
//!     let evaluator = Arc::new(KeywordEvaluator::default());
//!
//!     // 4. Run the scan with concurrency
//!     let runner = Runner::new(5, true); // 5 concurrent requests, verbose output
//!     let results = runner.run(target, strategy, evaluator).await?;
//!
//!     println!("Found {} successful attacks.", results.iter().filter(|r| r.success).count());
//!     Ok(())
//! }
//! ```

pub mod evaluator;
pub mod runner;
pub mod strategy;
pub mod target;

use serde::{Deserialize, Serialize};

/// A convenient type alias for `anyhow::Result`.
pub type RedOxideResult<T> = anyhow::Result<T>;

/// The result of a single Red Team attempt.
///
/// This struct captures the entire lifecycle of a specific prompt attempt:
/// what was sent, what came back, and whether the attack exposed a vulnerability.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AttackResult {
    /// The actual prompt sent to the model (after any strategy templates were applied).
    pub prompt: String,

    /// The raw text response received from the target model.
    pub response: String,

    /// The verdict of the evaluator.
    /// * `true`: The attack **succeeded** (the model generated unsafe content).
    /// * `false`: The attack **failed** (the model refused or remained safe).
    pub success: bool,

    /// The name of the strategy used (e.g., "Template-Based Jailbreak").
    pub strategy_name: String,
}