#![allow(dead_code)]
use cqs::parser::Language;
use std::path::PathBuf;
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct EvalQuery {
pub id: String,
pub query: String,
pub category: QueryCategory,
#[serde(default)]
pub tags: Vec<QueryTag>,
pub language: Option<String>,
pub primary_answer: GroundTruth,
#[serde(default)]
pub acceptable_answers: Vec<GroundTruth>,
#[serde(default)]
pub negative_examples: Vec<GroundTruth>,
pub split: EvalSplit,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct GroundTruth {
pub name: String,
pub file: String,
#[serde(default)]
pub line_start: Option<u32>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum QueryCategory {
IdentifierLookup,
BehavioralSearch,
ConceptualSearch,
TypeFiltered,
CrossLanguage,
StructuralSearch,
Negation,
MultiStep,
}
impl std::fmt::Display for QueryCategory {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::IdentifierLookup => write!(f, "identifier"),
Self::BehavioralSearch => write!(f, "behavioral"),
Self::ConceptualSearch => write!(f, "conceptual"),
Self::TypeFiltered => write!(f, "type_filtered"),
Self::CrossLanguage => write!(f, "cross_lang"),
Self::StructuralSearch => write!(f, "structural"),
Self::Negation => write!(f, "negation"),
Self::MultiStep => write!(f, "multi_step"),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum QueryTag {
CrossFile,
RecentAdd,
NoiseTolerant,
SynonymHeavy,
Acronym,
CaseSensitive,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum EvalSplit {
Train,
HeldOut,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct EvalQuerySet {
pub version: String,
pub created: String,
pub description: String,
pub queries: Vec<EvalQuery>,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct EvalQueryResult {
pub run_id: String,
pub query_id: String,
pub config_id: String,
pub rank_of_correct: Option<u32>,
pub reciprocal_rank: f64,
pub top_1_correct: bool,
pub top_5_correct: bool,
pub top_5_acceptable: bool,
pub top_1_score: f64,
pub top_2_score: f64,
pub retrieval_ms: f64,
pub rerank_ms: f64,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct MetricWithCI {
pub value: f64,
pub ci_lower: f64,
pub ci_upper: f64,
}
impl std::fmt::Display for MetricWithCI {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{:.1}% [{:.1}, {:.1}]",
self.value * 100.0,
self.ci_lower * 100.0,
self.ci_upper * 100.0
)
}
}
pub fn bootstrap_ci(values: &[f64], n_resamples: usize) -> MetricWithCI {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let n = values.len();
if n == 0 {
return MetricWithCI {
value: 0.0,
ci_lower: 0.0,
ci_upper: 0.0,
};
}
let point = values.iter().sum::<f64>() / n as f64;
let mut seed: u64 = {
let mut h = DefaultHasher::new();
values.len().hash(&mut h);
for v in values {
v.to_bits().hash(&mut h);
}
h.finish()
};
let mut estimates: Vec<f64> = Vec::with_capacity(n_resamples);
for _ in 0..n_resamples {
let mut sum = 0.0;
for _ in 0..n {
seed = seed
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
let idx = (seed >> 33) as usize % n;
sum += values[idx];
}
estimates.push(sum / n as f64);
}
estimates.sort_by(|a, b| a.total_cmp(b));
let lo_idx = ((n_resamples as f64 * 0.025).ceil() as usize).saturating_sub(1);
let hi_idx = (n_resamples as f64 * 0.975).ceil() as usize - 1;
MetricWithCI {
value: point,
ci_lower: estimates[lo_idx.min(estimates.len() - 1)],
ci_upper: estimates[hi_idx.min(estimates.len() - 1)],
}
}
pub fn paired_bootstrap(
a_values: &[f64],
b_values: &[f64],
n_resamples: usize,
) -> (f64, f64, f64, f64) {
assert_eq!(a_values.len(), b_values.len());
let n = a_values.len();
if n == 0 {
return (0.0, 0.0, 0.0, 1.0);
}
let deltas: Vec<f64> = a_values.iter().zip(b_values).map(|(a, b)| b - a).collect();
let observed_delta: f64 = deltas.iter().sum::<f64>() / n as f64;
let mut seed: u64 = 0x12345678_u64;
let mut boot_deltas: Vec<f64> = Vec::with_capacity(n_resamples);
for _ in 0..n_resamples {
let mut sum = 0.0;
for _ in 0..n {
seed = seed
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
let idx = (seed >> 33) as usize % n;
sum += deltas[idx];
}
boot_deltas.push(sum / n as f64);
}
boot_deltas.sort_by(|a, b| a.total_cmp(b));
let lo_idx = (n_resamples as f64 * 0.025) as usize;
let hi_idx = (n_resamples as f64 * 0.975).ceil() as usize - 1;
let p_value = if observed_delta >= 0.0 {
boot_deltas.iter().filter(|&&d| d <= 0.0).count() as f64 / n_resamples as f64
} else {
boot_deltas.iter().filter(|&&d| d >= 0.0).count() as f64 / n_resamples as f64
};
(
observed_delta,
boot_deltas[lo_idx.min(boot_deltas.len() - 1)],
boot_deltas[hi_idx.min(boot_deltas.len() - 1)],
(p_value * 2.0).min(1.0), )
}
pub struct EvalCase {
pub query: &'static str,
pub expected_name: &'static str,
pub language: Language,
pub also_accept: &'static [&'static str],
}
#[derive(serde::Deserialize)]
pub struct OwnedEvalCase {
pub query: String,
#[serde(alias = "expected_name")]
pub expected: String,
#[serde(default = "default_language")]
pub language: String,
#[serde(default)]
pub also_accept: Vec<String>,
}
fn default_language() -> String {
"rust".to_string()
}
pub fn load_eval_cases_from_json(path: &std::path::Path) -> Vec<OwnedEvalCase> {
let data = std::fs::read_to_string(path)
.unwrap_or_else(|e| panic!("Failed to read eval file {}: {}", path.display(), e));
serde_json::from_str(&data)
.unwrap_or_else(|e| panic!("Invalid JSON in {}: {}", path.display(), e))
}
impl OwnedEvalCase {
pub fn parsed_language(&self) -> Language {
self.language
.parse()
.unwrap_or_else(|_| panic!("Unknown language '{}' in eval case", self.language))
}
}
pub fn fixture_path(lang: Language) -> PathBuf {
let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".to_string());
let ext = lang.primary_extension();
PathBuf::from(manifest_dir)
.join("tests")
.join("fixtures")
.join(format!("eval_{}.{}", lang.to_string().to_lowercase(), ext))
}
pub fn hard_fixture_path(lang: Language) -> PathBuf {
let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".to_string());
let ext = lang.primary_extension();
PathBuf::from(manifest_dir)
.join("tests")
.join("fixtures")
.join(format!(
"eval_hard_{}.{}",
lang.to_string().to_lowercase(),
ext
))
}
pub const EVAL_CASES: &[EvalCase] = &[
EvalCase {
query: "retry with exponential backoff",
expected_name: "retry_with_backoff",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "validate email address format",
expected_name: "validate_email",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "parse JSON configuration file",
expected_name: "parse_json_config",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "compute SHA256 hash",
expected_name: "hash_sha256",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "format number as currency with commas",
expected_name: "format_currency",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "convert camelCase to snake_case",
expected_name: "camel_to_snake",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "truncate string with ellipsis",
expected_name: "truncate_string",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "check if string is valid UUID",
expected_name: "is_valid_uuid",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "sort array with quicksort algorithm",
expected_name: "quicksort",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "memoize function results",
expected_name: "get_or_compute",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "retry with exponential backoff",
expected_name: "retry_with_backoff",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "validate email address format",
expected_name: "validate_email",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "parse JSON config from file",
expected_name: "parse_json_config",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "compute SHA256 hash of bytes",
expected_name: "hash_sha256",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "format currency with dollar sign",
expected_name: "format_currency",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "convert camelCase to snake_case",
expected_name: "camel_to_snake",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "truncate string with ellipsis",
expected_name: "truncate_string",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "check UUID format validity",
expected_name: "is_valid_uuid",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "quicksort sorting algorithm",
expected_name: "quicksort",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "cache function results decorator",
expected_name: "memoize",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "retry operation with exponential backoff",
expected_name: "retryWithBackoff",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "validate email address",
expected_name: "validateEmail",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "parse JSON config string",
expected_name: "parseJsonConfig",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "SHA256 hash computation",
expected_name: "hashSha256",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "format money with commas",
expected_name: "formatCurrency",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "camelCase to snake_case conversion",
expected_name: "camelToSnake",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "truncate long string with dots",
expected_name: "truncateString",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "UUID format validation",
expected_name: "isValidUuid",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "quicksort implementation",
expected_name: "quicksort",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "memoization cache wrapper",
expected_name: "memoize",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "retry with exponential backoff delay",
expected_name: "retryWithBackoff",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "email validation regex",
expected_name: "validateEmail",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "JSON configuration parser",
expected_name: "parseJsonConfig",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "SHA256 cryptographic hash",
expected_name: "hashSha256",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "currency formatter",
expected_name: "formatCurrency",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "convert camel case to snake case",
expected_name: "camelToSnake",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "string truncation with ellipsis",
expected_name: "truncateString",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "UUID validation check",
expected_name: "isValidUuid",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "quicksort divide and conquer",
expected_name: "quicksort",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "function result memoization",
expected_name: "memoize",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "retry with exponential backoff",
expected_name: "RetryWithBackoff",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "email address validation",
expected_name: "ValidateEmail",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "parse JSON config file",
expected_name: "ParseJsonConfig",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "compute SHA256 hash",
expected_name: "HashSha256",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "format currency with commas",
expected_name: "FormatCurrency",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "camelCase to snake_case",
expected_name: "CamelToSnake",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "truncate string ellipsis",
expected_name: "TruncateString",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "validate UUID format",
expected_name: "IsValidUuid",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "quicksort algorithm",
expected_name: "Quicksort",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "memoization get or compute",
expected_name: "GetOrCompute",
language: Language::Go,
also_accept: &[],
},
];
pub const HARD_EVAL_CASES: &[EvalCase] = &[
EvalCase {
query: "stable sort preserving relative order of equal elements",
expected_name: "merge_sort",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "sort using binary max-heap data structure",
expected_name: "heap_sort",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "simple sort efficient for small nearly sorted arrays",
expected_name: "insertion_sort",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "non-comparison integer sort processing digits",
expected_name: "radix_sort",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "validate phone number with international country code",
expected_name: "validate_phone",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "check if URL has valid protocol and hostname",
expected_name: "validate_url",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "pad string to fixed width with fill character",
expected_name: "pad_string",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "count number of words in text",
expected_name: "count_words",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "extract numeric values from mixed text string",
expected_name: "extract_numbers",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "stop calling service after consecutive failures",
expected_name: "CircuitBreaker",
language: Language::Rust,
also_accept: &["should_allow", "record_failure"],
},
EvalCase {
query: "check whether circuit allows request through",
expected_name: "should_allow",
language: Language::Rust,
also_accept: &["CircuitBreaker"],
},
EvalCase {
query: "stable sort preserving relative order of equal elements",
expected_name: "merge_sort",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "sort using binary max-heap data structure",
expected_name: "heap_sort",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "simple sort efficient for small nearly sorted arrays",
expected_name: "insertion_sort",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "non-comparison integer sort processing digits",
expected_name: "radix_sort",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "validate phone number with international country code",
expected_name: "validate_phone",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "check if URL has valid protocol and hostname",
expected_name: "validate_url",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "pad string to fixed width with fill character",
expected_name: "pad_string",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "count number of words in text",
expected_name: "count_words",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "extract numeric values from mixed text string",
expected_name: "extract_numbers",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "stop calling service after consecutive failures",
expected_name: "CircuitBreaker",
language: Language::Python,
also_accept: &["should_allow", "record_failure"],
},
EvalCase {
query: "check whether circuit allows request through",
expected_name: "should_allow",
language: Language::Python,
also_accept: &["CircuitBreaker"],
},
EvalCase {
query: "stable sort preserving relative order of equal elements",
expected_name: "mergeSort",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "sort using binary max-heap data structure",
expected_name: "heapSort",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "simple sort efficient for small nearly sorted arrays",
expected_name: "insertionSort",
language: Language::TypeScript,
also_accept: &["_insertionSortSmall"],
},
EvalCase {
query: "non-comparison integer sort processing digits",
expected_name: "radixSort",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "validate phone number with international country code",
expected_name: "validatePhone",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "check if URL has valid protocol and hostname",
expected_name: "validateUrl",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "pad string to fixed width with fill character",
expected_name: "padString",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "count number of words in text",
expected_name: "countWords",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "extract numeric values from mixed text string",
expected_name: "extractNumbers",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "stop calling service after consecutive failures",
expected_name: "CircuitBreaker",
language: Language::TypeScript,
also_accept: &["shouldAllow", "recordFailure"],
},
EvalCase {
query: "check whether circuit allows request through",
expected_name: "shouldAllow",
language: Language::TypeScript,
also_accept: &["CircuitBreaker"],
},
EvalCase {
query: "stable sort preserving relative order of equal elements",
expected_name: "mergeSort",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "sort using binary max-heap data structure",
expected_name: "heapSort",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "simple sort efficient for small nearly sorted arrays",
expected_name: "insertionSort",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "non-comparison integer sort processing digits",
expected_name: "radixSort",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "validate phone number with international country code",
expected_name: "validatePhone",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "check if URL has valid protocol and hostname",
expected_name: "validateUrl",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "pad string to fixed width with fill character",
expected_name: "padString",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "count number of words in text",
expected_name: "countWords",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "extract numeric values from mixed text string",
expected_name: "extractNumbers",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "stop calling service after consecutive failures",
expected_name: "CircuitBreaker",
language: Language::JavaScript,
also_accept: &["shouldAllow", "recordFailure"],
},
EvalCase {
query: "check whether circuit allows request through",
expected_name: "shouldAllow",
language: Language::JavaScript,
also_accept: &["CircuitBreaker"],
},
EvalCase {
query: "stable sort preserving relative order of equal elements",
expected_name: "MergeSort",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "sort using binary max-heap data structure",
expected_name: "HeapSort",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "simple sort efficient for small nearly sorted arrays",
expected_name: "InsertionSort",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "non-comparison integer sort processing digits",
expected_name: "RadixSort",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "validate phone number with international country code",
expected_name: "ValidatePhone",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "check if URL has valid protocol and hostname",
expected_name: "ValidateUrl",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "pad string to fixed width with fill character",
expected_name: "PadString",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "count number of words in text",
expected_name: "CountWords",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "extract numeric values from mixed text string",
expected_name: "ExtractNumbers",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "stop calling service after consecutive failures",
expected_name: "CircuitBreakerGo",
language: Language::Go,
also_accept: &["ShouldAllow", "RecordFailure"],
},
EvalCase {
query: "check whether circuit allows request through",
expected_name: "ShouldAllow",
language: Language::Go,
also_accept: &["CircuitBreakerGo"],
},
EvalCase {
query: "stable sort preserving relative order of equal elements",
expected_name: "mergeSort",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "sort using binary max-heap data structure",
expected_name: "heapSort",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "simple sort efficient for small nearly sorted arrays",
expected_name: "insertionSort",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "non-comparison integer sort processing digits",
expected_name: "radixSort",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "validate phone number with international country code",
expected_name: "validatePhone",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "check if URL has valid protocol and hostname",
expected_name: "validateUrl",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "pad string to fixed width with fill character",
expected_name: "padString",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "count number of words in text",
expected_name: "countWords",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "extract numeric values from mixed text string",
expected_name: "extractNumbers",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "stop calling service after consecutive failures",
expected_name: "CircuitBreaker",
language: Language::Java,
also_accept: &["shouldAllow", "recordFailure"],
},
EvalCase {
query: "check whether circuit allows request through",
expected_name: "shouldAllow",
language: Language::Java,
also_accept: &["CircuitBreaker"],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "stable sort preserving relative order of equal elements",
expected_name: "mergeSort",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "sort using binary max-heap data structure",
expected_name: "heapSort",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "simple sort efficient for small nearly sorted arrays",
expected_name: "insertionSort",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "non-comparison integer sort processing digits",
expected_name: "radixSort",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "validate phone number with international country code",
expected_name: "validatePhone",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "check if URL has valid protocol and hostname",
expected_name: "validateUrl",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "pad string to fixed width with fill character",
expected_name: "padString",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "count number of words in text",
expected_name: "countWords",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "extract numeric values from mixed text string",
expected_name: "extractNumbers",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "stop calling service after consecutive failures",
expected_name: "CircuitBreaker",
language: Language::Php,
also_accept: &["shouldAllow", "recordFailure"],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "check whether circuit allows request through",
expected_name: "shouldAllow",
language: Language::Php,
also_accept: &["CircuitBreaker"],
},
];
pub const HOLDOUT_EVAL_CASES: &[EvalCase] = &[
EvalCase {
query: "send data to remote API endpoint as JSON",
expected_name: "http_post_json",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "send data to remote API endpoint as JSON",
expected_name: "http_post_json",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "send data to remote API endpoint as JSON",
expected_name: "httpPostJson",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "send data to remote API endpoint as JSON",
expected_name: "httpPostJson",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "send data to remote API endpoint as JSON",
expected_name: "HttpPostJson",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "load text content from file on disk",
expected_name: "read_file_utf8",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "load text content from file on disk",
expected_name: "read_file_utf8",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "load text content from file on disk",
expected_name: "readFileUtf8",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "load text content from file on disk",
expected_name: "readFileUtf8",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "load text content from file on disk",
expected_name: "ReadFileUtf8",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "safely write data to file without corruption on crash",
expected_name: "write_file_atomic",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "safely write data to file without corruption on crash",
expected_name: "write_file_atomic",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "safely write data to file without corruption on crash",
expected_name: "writeFileAtomic",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "safely write data to file without corruption on crash",
expected_name: "writeFileAtomic",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "safely write data to file without corruption on crash",
expected_name: "WriteFileAtomic",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "compute arithmetic average of a list of numbers",
expected_name: "calculate_mean",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "compute arithmetic average of a list of numbers",
expected_name: "calculate_mean",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "compute arithmetic average of a list of numbers",
expected_name: "calculateMean",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "compute arithmetic average of a list of numbers",
expected_name: "calculateMean",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "compute arithmetic average of a list of numbers",
expected_name: "CalculateMean",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "find the largest element in an array",
expected_name: "find_maximum",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "find the largest element in an array",
expected_name: "find_maximum",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "find the largest element in an array",
expected_name: "findMaximum",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "find the largest element in an array",
expected_name: "findMaximum",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "find the largest element in an array",
expected_name: "FindMaximum",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "create a unique random identifier string",
expected_name: "generate_random_id",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "create a unique random identifier string",
expected_name: "generate_random_id",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "create a unique random identifier string",
expected_name: "generateRandomId",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "create a unique random identifier string",
expected_name: "generateRandomId",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "create a unique random identifier string",
expected_name: "GenerateRandomId",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "compress data using run-length encoding",
expected_name: "compress_rle",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "compress data using run-length encoding",
expected_name: "compress_rle",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "compress data using run-length encoding",
expected_name: "compressRle",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "compress data using run-length encoding",
expected_name: "compressRle",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "compress data using run-length encoding",
expected_name: "CompressRle",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "parse command-line flags and arguments into a map",
expected_name: "parse_cli_args",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "parse command-line flags and arguments into a map",
expected_name: "parse_cli_args",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "parse command-line flags and arguments into a map",
expected_name: "parseCliArgs",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "parse command-line flags and arguments into a map",
expected_name: "parseCliArgs",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "parse command-line flags and arguments into a map",
expected_name: "ParseCliArgs",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "delay function execution until input stops changing",
expected_name: "Debouncer",
language: Language::Rust,
also_accept: &["should_execute"],
},
EvalCase {
query: "delay function execution until input stops changing",
expected_name: "debounce",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "delay function execution until input stops changing",
expected_name: "debounce",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "delay function execution until input stops changing",
expected_name: "debounce",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "delay function execution until input stops changing",
expected_name: "NewDebouncer",
language: Language::Go,
also_accept: &["Debouncer", "ShouldExecute"],
},
EvalCase {
query: "recursively flatten nested lists into a single flat list",
expected_name: "flatten_nested_list",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "recursively flatten nested lists into a single flat list",
expected_name: "flattenNestedArray",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "recursively flatten nested lists into a single flat list",
expected_name: "flattenNestedArray",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "recursively flatten nested lists into a single flat list",
expected_name: "FlattenNestedSlice",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "recursively merge two nested dictionaries or config objects",
expected_name: "deep_merge_dicts",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "recursively merge two nested dictionaries or config objects",
expected_name: "deepMergeObjects",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "recursively merge two nested dictionaries or config objects",
expected_name: "deepMergeObjects",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "recursively merge two nested dictionaries or config objects",
expected_name: "DeepMergeMaps",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "sort by repeatedly swapping adjacent out-of-order elements",
expected_name: "bubble_sort",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "sort by repeatedly swapping adjacent out-of-order elements",
expected_name: "bubble_sort",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "sort by repeatedly swapping adjacent out-of-order elements",
expected_name: "bubbleSort",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "sort by repeatedly swapping adjacent out-of-order elements",
expected_name: "bubbleSort",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "sort by repeatedly swapping adjacent out-of-order elements",
expected_name: "BubbleSort",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "reverse the order of characters in a string",
expected_name: "reverse_string",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "reverse the order of characters in a string",
expected_name: "reverse_string",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "reverse the order of characters in a string",
expected_name: "reverseString",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "reverse the order of characters in a string",
expected_name: "reverseString",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "reverse the order of characters in a string",
expected_name: "ReverseString",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "check if string is a valid IPv4 address with four octets",
expected_name: "validate_ip_address",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "check if string is a valid IPv4 address with four octets",
expected_name: "validate_ip_address",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "check if string is a valid IPv4 address with four octets",
expected_name: "validateIpAddress",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "check if string is a valid IPv4 address with four octets",
expected_name: "validateIpAddress",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "check if string is a valid IPv4 address with four octets",
expected_name: "ValidateIpAddress",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "compute CRC32 checksum of byte data",
expected_name: "hash_crc32",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "compute CRC32 checksum of byte data",
expected_name: "hash_crc32",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "compute CRC32 checksum of byte data",
expected_name: "hashCrc32",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "compute CRC32 checksum of byte data",
expected_name: "hashCrc32",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "compute CRC32 checksum of byte data",
expected_name: "HashCrc32",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "throttle request rate using token bucket algorithm",
expected_name: "RateLimiter",
language: Language::Rust,
also_accept: &["allow"],
},
EvalCase {
query: "throttle request rate using token bucket algorithm",
expected_name: "RateLimiter",
language: Language::Python,
also_accept: &["allow"],
},
EvalCase {
query: "throttle request rate using token bucket algorithm",
expected_name: "RateLimiter",
language: Language::TypeScript,
also_accept: &["allow"],
},
EvalCase {
query: "throttle request rate using token bucket algorithm",
expected_name: "RateLimiter",
language: Language::JavaScript,
also_accept: &["allow"],
},
EvalCase {
query: "throttle request rate using token bucket algorithm",
expected_name: "RateLimiterGo",
language: Language::Go,
also_accept: &["NewRateLimiter", "Allow"],
},
EvalCase {
query: "mark a successful call to reset circuit breaker failure count",
expected_name: "record_success",
language: Language::Rust,
also_accept: &["CircuitBreaker"],
},
EvalCase {
query: "mark a successful call to reset circuit breaker failure count",
expected_name: "record_success",
language: Language::Python,
also_accept: &["CircuitBreaker"],
},
EvalCase {
query: "mark a successful call to reset circuit breaker failure count",
expected_name: "recordSuccess",
language: Language::TypeScript,
also_accept: &["CircuitBreaker"],
},
EvalCase {
query: "mark a successful call to reset circuit breaker failure count",
expected_name: "recordSuccess",
language: Language::JavaScript,
also_accept: &["CircuitBreaker"],
},
EvalCase {
query: "mark a successful call to reset circuit breaker failure count",
expected_name: "RecordSuccess",
language: Language::Go,
also_accept: &["CircuitBreakerGo"],
},
EvalCase {
query: "automatically retry failed operations with increasing delay between attempts",
expected_name: "retry_with_backoff",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "automatically retry failed operations with increasing delay between attempts",
expected_name: "retry_with_backoff",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "automatically retry failed operations with increasing delay between attempts",
expected_name: "retryWithBackoff",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "automatically retry failed operations with increasing delay between attempts",
expected_name: "retryWithBackoff",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "automatically retry failed operations with increasing delay between attempts",
expected_name: "RetryWithBackoff",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "check if a string looks like a properly formatted email with @ and domain",
expected_name: "validate_email",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "check if a string looks like a properly formatted email with @ and domain",
expected_name: "validate_email",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "check if a string looks like a properly formatted email with @ and domain",
expected_name: "validateEmail",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "check if a string looks like a properly formatted email with @ and domain",
expected_name: "validateEmail",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "check if a string looks like a properly formatted email with @ and domain",
expected_name: "ValidateEmail",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "divide and conquer sort that picks a pivot and partitions around it",
expected_name: "quicksort",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "divide and conquer sort that picks a pivot and partitions around it",
expected_name: "quicksort",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "divide and conquer sort that picks a pivot and partitions around it",
expected_name: "quicksort",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "divide and conquer sort that picks a pivot and partitions around it",
expected_name: "quicksort",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "divide and conquer sort that picks a pivot and partitions around it",
expected_name: "Quicksort",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "display a decimal number as money with dollar sign and commas",
expected_name: "format_currency",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "display a decimal number as money with dollar sign and commas",
expected_name: "format_currency",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "display a decimal number as money with dollar sign and commas",
expected_name: "formatCurrency",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "display a decimal number as money with dollar sign and commas",
expected_name: "formatCurrency",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "display a decimal number as money with dollar sign and commas",
expected_name: "FormatCurrency",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "transform PascalCase or camelCase identifiers to underscore_separated lowercase",
expected_name: "camel_to_snake",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "transform PascalCase or camelCase identifiers to underscore_separated lowercase",
expected_name: "camel_to_snake",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "transform PascalCase or camelCase identifiers to underscore_separated lowercase",
expected_name: "camelToSnake",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "transform PascalCase or camelCase identifiers to underscore_separated lowercase",
expected_name: "camelToSnake",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "transform PascalCase or camelCase identifiers to underscore_separated lowercase",
expected_name: "CamelToSnake",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "validate that a string matches the 8-4-4-4-12 hexadecimal UUID pattern",
expected_name: "is_valid_uuid",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "validate that a string matches the 8-4-4-4-12 hexadecimal UUID pattern",
expected_name: "is_valid_uuid",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "validate that a string matches the 8-4-4-4-12 hexadecimal UUID pattern",
expected_name: "isValidUuid",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "validate that a string matches the 8-4-4-4-12 hexadecimal UUID pattern",
expected_name: "isValidUuid",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "validate that a string matches the 8-4-4-4-12 hexadecimal UUID pattern",
expected_name: "IsValidUuid",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "generate a cryptographic digest of data using the SHA-256 algorithm",
expected_name: "hash_sha256",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "generate a cryptographic digest of data using the SHA-256 algorithm",
expected_name: "hash_sha256",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "generate a cryptographic digest of data using the SHA-256 algorithm",
expected_name: "hashSha256",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "generate a cryptographic digest of data using the SHA-256 algorithm",
expected_name: "hashSha256",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "generate a cryptographic digest of data using the SHA-256 algorithm",
expected_name: "HashSha256",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "shorten text to a character limit and append ellipsis if trimmed",
expected_name: "truncate_string",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "shorten text to a character limit and append ellipsis if trimmed",
expected_name: "truncate_string",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "shorten text to a character limit and append ellipsis if trimmed",
expected_name: "truncateString",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "shorten text to a character limit and append ellipsis if trimmed",
expected_name: "truncateString",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "shorten text to a character limit and append ellipsis if trimmed",
expected_name: "TruncateString",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "deserialize a JSON string into a typed configuration object",
expected_name: "parse_json_config",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "deserialize a JSON string into a typed configuration object",
expected_name: "parse_json_config",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "deserialize a JSON string into a typed configuration object",
expected_name: "parseJsonConfig",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "deserialize a JSON string into a typed configuration object",
expected_name: "parseJsonConfig",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "deserialize a JSON string into a typed configuration object",
expected_name: "ParseJsonConfig",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "prevent API abuse by limiting how many requests a client can make per second",
expected_name: "RateLimiter",
language: Language::Rust,
also_accept: &["allow"],
},
EvalCase {
query: "prevent API abuse by limiting how many requests a client can make per second",
expected_name: "RateLimiter",
language: Language::Python,
also_accept: &["allow"],
},
EvalCase {
query: "prevent API abuse by limiting how many requests a client can make per second",
expected_name: "RateLimiter",
language: Language::TypeScript,
also_accept: &["allow"],
},
EvalCase {
query: "prevent API abuse by limiting how many requests a client can make per second",
expected_name: "RateLimiter",
language: Language::JavaScript,
also_accept: &["allow"],
},
EvalCase {
query: "prevent API abuse by limiting how many requests a client can make per second",
expected_name: "RateLimiterGo",
language: Language::Go,
also_accept: &["NewRateLimiter", "Allow"],
},
EvalCase {
query: "cache the results of expensive function calls to avoid redundant computation",
expected_name: "get_or_compute",
language: Language::Rust,
also_accept: &["Memoizer"],
},
EvalCase {
query: "cache the results of expensive function calls to avoid redundant computation",
expected_name: "memoize",
language: Language::Python,
also_accept: &["Memoizer"],
},
EvalCase {
query: "cache the results of expensive function calls to avoid redundant computation",
expected_name: "memoize",
language: Language::TypeScript,
also_accept: &["Memoizer"],
},
EvalCase {
query: "cache the results of expensive function calls to avoid redundant computation",
expected_name: "memoize",
language: Language::JavaScript,
also_accept: &["Memoizer"],
},
EvalCase {
query: "cache the results of expensive function calls to avoid redundant computation",
expected_name: "GetOrCompute",
language: Language::Go,
also_accept: &["Memoizer", "NewMemoizer"],
},
EvalCase {
query: "read the entire contents of a text file as a UTF-8 encoded string",
expected_name: "read_file_utf8",
language: Language::Rust,
also_accept: &[],
},
EvalCase {
query: "read the entire contents of a text file as a UTF-8 encoded string",
expected_name: "read_file_utf8",
language: Language::Python,
also_accept: &[],
},
EvalCase {
query: "read the entire contents of a text file as a UTF-8 encoded string",
expected_name: "readFileUtf8",
language: Language::TypeScript,
also_accept: &[],
},
EvalCase {
query: "read the entire contents of a text file as a UTF-8 encoded string",
expected_name: "readFileUtf8",
language: Language::JavaScript,
also_accept: &[],
},
EvalCase {
query: "read the entire contents of a text file as a UTF-8 encoded string",
expected_name: "ReadFileUtf8",
language: Language::Go,
also_accept: &[],
},
EvalCase {
query: "send data to remote API endpoint as JSON",
expected_name: "httpPostJson",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "load text content from file on disk",
expected_name: "readFileUtf8",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "safely write data to file without corruption on crash",
expected_name: "writeFileAtomic",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "compute arithmetic average of a list of numbers",
expected_name: "calculateMean",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "find the largest element in an array",
expected_name: "findMaximum",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "create a unique random identifier string",
expected_name: "generateRandomId",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "compress data using run-length encoding",
expected_name: "compressRle",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "parse command-line flags and arguments into a map",
expected_name: "parseCliArgs",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "delay function execution until input stops changing",
expected_name: "Debouncer",
language: Language::Java,
also_accept: &["debounce"],
},
EvalCase {
query: "sort by repeatedly swapping adjacent out-of-order elements",
expected_name: "bubbleSort",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "reverse the order of characters in a string",
expected_name: "reverseString",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "check if string is a valid IPv4 address with four octets",
expected_name: "validateIpAddress",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "compute CRC32 checksum of byte data",
expected_name: "hashCrc32",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "throttle request rate using token bucket algorithm",
expected_name: "RateLimiter",
language: Language::Java,
also_accept: &["allow"],
},
EvalCase {
query: "mark a successful call to reset circuit breaker failure count",
expected_name: "recordSuccess",
language: Language::Java,
also_accept: &["CircuitBreaker"],
},
EvalCase {
query: "automatically retry failed operations with increasing delay between attempts",
expected_name: "retryWithBackoff",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "check if a string looks like a properly formatted email with @ and domain",
expected_name: "validateEmail",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "divide and conquer sort that picks a pivot and partitions around it",
expected_name: "quicksort",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "display a decimal number as money with dollar sign and commas",
expected_name: "formatCurrency",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "transform PascalCase or camelCase identifiers to underscore_separated lowercase",
expected_name: "camelToSnake",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "validate that a string matches the 8-4-4-4-12 hexadecimal UUID pattern",
expected_name: "isValidUuid",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "generate a cryptographic digest of data using the SHA-256 algorithm",
expected_name: "hashSha256",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "shorten text to a character limit and append ellipsis if trimmed",
expected_name: "truncateString",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "deserialize a JSON string into a typed configuration object",
expected_name: "parseJsonConfig",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "prevent API abuse by limiting how many requests a client can make per second",
expected_name: "RateLimiter",
language: Language::Java,
also_accept: &["allow"],
},
EvalCase {
query: "cache the results of expensive function calls to avoid redundant computation",
expected_name: "getOrCompute",
language: Language::Java,
also_accept: &["Memoizer"],
},
EvalCase {
query: "read the entire contents of a text file as a UTF-8 encoded string",
expected_name: "readFileUtf8",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "traverse graph level by level visiting nearest nodes first using a queue",
expected_name: "bfsTraversal",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "traverse graph exploring as deep as possible before backtracking using a stack",
expected_name: "dfsTraversal",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "cache that evicts the least recently accessed entry when full",
expected_name: "LruCache",
language: Language::Java,
also_accept: &["get", "put"],
},
EvalCase {
query: "cache that automatically expires entries after a time-to-live duration",
expected_name: "TtlCache",
language: Language::Java,
also_accept: &["get", "put", "evictExpired"],
},
EvalCase {
query: "convert list of records into comma-separated values format with header",
expected_name: "serializeToCsv",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "convert key-value data into XML document with named elements",
expected_name: "serializeToXml",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "match filename against wildcard pattern with asterisk and question mark",
expected_name: "globMatch",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "match text against regular expression and extract captured groups",
expected_name: "regexMatchGroups",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "try primary operation and fall back to alternative on repeated failure",
expected_name: "retryWithFallback",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "recursively flatten nested lists into a single flat list",
expected_name: "flattenNestedList",
language: Language::Java,
also_accept: &[],
},
EvalCase {
query: "recursively merge two nested dictionaries or config objects",
expected_name: "deepMergeMaps",
language: Language::Java,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "send data to remote API endpoint as JSON",
expected_name: "httpPostJson",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "load text content from file on disk",
expected_name: "readFileUtf8",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "safely write data to file without corruption on crash",
expected_name: "writeFileAtomic",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "compute arithmetic average of a list of numbers",
expected_name: "calculateMean",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "find the largest element in an array",
expected_name: "findMaximum",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "create a unique random identifier string",
expected_name: "generateRandomId",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "compress data using run-length encoding",
expected_name: "compressRle",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "parse command-line flags and arguments into a map",
expected_name: "parseCliArgs",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "delay function execution until input stops changing",
expected_name: "debounce",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "sort by repeatedly swapping adjacent out-of-order elements",
expected_name: "bubbleSort",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "reverse the order of characters in a string",
expected_name: "reverseString",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "check if string is a valid IPv4 address with four octets",
expected_name: "validateIpAddress",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "compute CRC32 checksum of string data",
expected_name: "hashCrc32",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "throttle request rate using token bucket algorithm",
expected_name: "RateLimiter",
language: Language::Php,
also_accept: &["allow"],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "mark a successful call to reset circuit breaker failure count",
expected_name: "recordSuccess",
language: Language::Php,
also_accept: &["CircuitBreaker"],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "automatically retry failed operations with increasing delay between attempts",
expected_name: "retryWithBackoff",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "check if a string looks like a properly formatted email with @ and domain",
expected_name: "validateEmail",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "divide and conquer sort that picks a pivot and partitions around it",
expected_name: "quicksort",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "display a decimal number as money with dollar sign and commas",
expected_name: "formatCurrency",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "transform PascalCase or camelCase identifiers to underscore_separated lowercase",
expected_name: "camelToSnake",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "validate that a string matches the 8-4-4-4-12 hexadecimal UUID pattern",
expected_name: "isValidUuid",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "generate a cryptographic digest of data using the SHA-256 algorithm",
expected_name: "hashSha256",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "shorten text to a character limit and append ellipsis if trimmed",
expected_name: "truncateString",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "deserialize a JSON string into a typed configuration object",
expected_name: "parseJsonConfig",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "prevent API abuse by limiting how many requests a client can make per second",
expected_name: "RateLimiter",
language: Language::Php,
also_accept: &["allow"],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "cache the results of expensive function calls to avoid redundant computation",
expected_name: "memoize",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "read the entire contents of a text file as a UTF-8 encoded string",
expected_name: "readFileUtf8",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "traverse graph level by level visiting nearest nodes first using a queue",
expected_name: "bfsTraversal",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "traverse graph exploring as deep as possible before backtracking using a stack",
expected_name: "dfsTraversal",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "cache that evicts the least recently accessed entry when full",
expected_name: "LruCache",
language: Language::Php,
also_accept: &["get", "put"],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "cache that automatically expires entries after a time-to-live duration",
expected_name: "TtlCache",
language: Language::Php,
also_accept: &["get", "put", "evictExpired"],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "convert list of records into comma-separated values format with header",
expected_name: "serializeToCsv",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "convert key-value data into XML document with named elements",
expected_name: "serializeToXml",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "match filename against wildcard pattern with asterisk and question mark",
expected_name: "globMatch",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "match text against regular expression and extract captured groups",
expected_name: "regexMatchGroups",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "try primary operation and fall back to alternative on repeated failure",
expected_name: "retryWithFallback",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "recursively flatten nested arrays into a single flat array",
expected_name: "flattenNestedArray",
language: Language::Php,
also_accept: &[],
},
#[cfg(feature = "lang-php")]
EvalCase {
query: "recursively merge two nested arrays with deep merge strategy",
expected_name: "deepMergeArrays",
language: Language::Php,
also_accept: &[],
},
];
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
fn fixture_dir() -> PathBuf {
let manifest = std::env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".into());
PathBuf::from(manifest).join("tests").join("fixtures")
}
#[test]
fn test_load_eval_cases_from_json() {
let cases = load_eval_cases_from_json(&fixture_dir().join("eval_sample.json"));
assert_eq!(cases.len(), 3);
assert_eq!(cases[0].query, "retry with exponential backoff");
assert_eq!(cases[0].expected, "retry_with_backoff");
assert_eq!(cases[0].language, "rust");
assert!(cases[0].also_accept.is_empty());
assert_eq!(cases[1].expected, "validate_email");
assert_eq!(cases[2].language, "rust");
assert!(cases[2].also_accept.is_empty());
}
#[test]
fn test_owned_eval_case_parsed_language() {
let case = OwnedEvalCase {
query: "test".into(),
expected: "test_fn".into(),
language: "rust".into(),
also_accept: vec![],
};
assert_eq!(case.parsed_language(), Language::Rust);
}
#[test]
#[should_panic(expected = "Failed to read eval file")]
fn test_load_eval_cases_missing_file() {
load_eval_cases_from_json(std::path::Path::new("/nonexistent/path.json"));
}
#[test]
#[should_panic(expected = "Invalid JSON")]
fn test_load_eval_cases_invalid_json() {
let dir = tempfile::TempDir::new().unwrap();
let path = dir.path().join("bad.json");
std::fs::write(&path, "not json").unwrap();
load_eval_cases_from_json(&path);
}
}