#![allow(dead_code, unused_imports, unused_variables)]
pub mod config;
pub mod fixtures;
pub mod levels;
pub mod report;
pub mod runner;
pub mod scoring;
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
pub enum Difficulty {
Easy,
Medium,
Hard,
VeryHard,
Extreme,
Mega,
}
impl std::fmt::Display for Difficulty {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Easy => write!(f, "Easy"),
Self::Medium => write!(f, "Medium"),
Self::Hard => write!(f, "Hard"),
Self::VeryHard => write!(f, "Very Hard"),
Self::Extreme => write!(f, "Extreme"),
Self::Mega => write!(f, "Mega"),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BenchScenario {
pub id: String,
pub description: String,
pub image_path: PathBuf,
pub prompt: String,
pub expected: ExpectedAnswer,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ExpectedAnswer {
Keywords(Vec<String>),
JsonFields(serde_json::Value),
KeyValuePairs(Vec<(String, String)>),
VisualScores(Vec<f64>),
}
#[async_trait]
pub trait VlmBenchLevel: Send + Sync {
fn name(&self) -> &str;
fn difficulty(&self) -> Difficulty;
fn description(&self) -> &str;
fn scenarios(&self) -> Vec<BenchScenario>;
fn evaluate(&self, scenario: &BenchScenario, response: &str) -> scoring::LevelScore;
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_difficulty_ordering() {
assert!(Difficulty::Easy < Difficulty::Medium);
assert!(Difficulty::Medium < Difficulty::Hard);
assert!(Difficulty::Hard < Difficulty::VeryHard);
assert!(Difficulty::VeryHard < Difficulty::Extreme);
assert!(Difficulty::Extreme < Difficulty::Mega);
}
#[test]
fn test_difficulty_display() {
assert_eq!(format!("{}", Difficulty::Easy), "Easy");
assert_eq!(format!("{}", Difficulty::VeryHard), "Very Hard");
assert_eq!(format!("{}", Difficulty::Mega), "Mega");
}
#[test]
fn test_bench_scenario_serde_roundtrip() {
let scenario = BenchScenario {
id: "test_01".into(),
description: "Test scenario".into(),
image_path: PathBuf::from("test.png"),
prompt: "What do you see?".into(),
expected: ExpectedAnswer::Keywords(vec!["dashboard".into(), "panel".into()]),
};
let json = serde_json::to_string(&scenario).unwrap();
let parsed: BenchScenario = serde_json::from_str(&json).unwrap();
assert_eq!(parsed.id, "test_01");
assert_eq!(parsed.prompt, "What do you see?");
}
#[test]
fn test_expected_answer_variants() {
let kw = ExpectedAnswer::Keywords(vec!["a".into()]);
let json_str = serde_json::to_string(&kw).unwrap();
assert!(json_str.contains("Keywords"));
let jf = ExpectedAnswer::JsonFields(serde_json::json!({"panel": "dashboard"}));
let json_str = serde_json::to_string(&jf).unwrap();
assert!(json_str.contains("JsonFields"));
let vs = ExpectedAnswer::VisualScores(vec![80.0, 70.0, 90.0]);
let json_str = serde_json::to_string(&vs).unwrap();
assert!(json_str.contains("VisualScores"));
}
}