use rand::Rng;
const PROPERTY_NAMES: &[&[&str]] = &[
&["text", "content", "body", "message", "summary", "description", "title", "headline", "caption", "excerpt"],
&["name", "label", "id", "slug", "key", "code", "tag", "category", "kind"],
&["query", "prompt", "question", "topic", "subject", "criteria", "instruction"],
&["author", "source", "url", "language", "format", "version", "status"],
&["score", "rating", "count", "weight", "priority", "rank", "threshold", "limit"],
&["is_draft", "is_active", "enabled", "verified", "flagged", "approved"],
&["image", "photo", "thumbnail", "avatar", "icon", "banner"],
&["audio", "recording", "clip", "track", "voice"],
&["video", "footage", "stream", "animation"],
&["file", "document", "attachment", "upload"],
&["tags", "keywords", "labels", "categories", "items", "entries", "values"],
];
#[derive(Clone, Copy, PartialEq, Eq)]
pub(super) enum PropType {
String,
Number,
Integer,
Boolean,
Image,
Audio,
Video,
File,
StringArray,
}
impl PropType {
fn to_json(self) -> serde_json::Value {
match self {
PropType::String => serde_json::json!({"type": "string"}),
PropType::Number => serde_json::json!({"type": "number"}),
PropType::Integer => serde_json::json!({"type": "integer"}),
PropType::Boolean => serde_json::json!({"type": "boolean"}),
PropType::Image => serde_json::json!({"type": "image"}),
PropType::Audio => serde_json::json!({"type": "audio"}),
PropType::Video => serde_json::json!({"type": "video"}),
PropType::File => serde_json::json!({"type": "file"}),
PropType::StringArray => serde_json::json!({"type": "array", "items": {"type": "string"}}),
}
}
pub(super) fn is_textual(self) -> bool {
matches!(self, PropType::String | PropType::Number | PropType::Integer | PropType::Boolean)
}
pub(super) fn is_media(self) -> bool {
matches!(self, PropType::Image | PropType::Audio | PropType::Video | PropType::File)
}
}
pub(super) fn type_for_name(name: &str, rng: &mut impl Rng) -> PropType {
if ["image", "photo", "thumbnail", "avatar", "icon", "banner"].contains(&name) {
return PropType::Image;
}
if ["audio", "recording", "clip", "track", "voice"].contains(&name) {
return PropType::Audio;
}
if ["video", "footage", "stream", "animation"].contains(&name) {
return PropType::Video;
}
if ["file", "document", "attachment", "upload"].contains(&name) {
return PropType::File;
}
if name.starts_with("is_") || ["enabled", "verified", "flagged", "approved"].contains(&name) {
return PropType::Boolean;
}
if ["score", "rating", "count", "weight", "priority", "rank", "threshold", "limit"].contains(&name) {
return if rng.random_range(0u32..2) == 0 { PropType::Number } else { PropType::Integer };
}
if ["tags", "keywords", "labels", "categories", "entries", "values"].contains(&name) {
return PropType::StringArray;
}
match rng.random_range(0u32..10) {
0 => PropType::Number,
1 => PropType::Integer,
2 => PropType::Boolean,
_ => PropType::String,
}
}
fn pick_names(n: usize, rng: &mut impl Rng) -> Vec<&'static str> {
let flat: Vec<&str> = PROPERTY_NAMES.iter().flat_map(|g| g.iter().copied()).collect();
let mut selected = Vec::with_capacity(n);
let mut indices: Vec<usize> = (0..flat.len()).collect();
for i in 0..n.min(flat.len()) {
let j = rng.random_range(i..indices.len());
indices.swap(i, j);
selected.push(flat[indices[i]]);
}
selected
}
pub fn random_scalar_input_schema(rng: &mut impl Rng) -> String {
let n_props = rng.random_range(1u32..=5) as usize;
let names = pick_names(n_props, rng);
let mut properties = serde_json::Map::new();
let mut types: Vec<PropType> = Vec::with_capacity(n_props);
for &name in &names {
let pt = type_for_name(name, rng);
types.push(pt);
let mut schema = pt.to_json();
if rng.random_range(0u32..3) == 0 {
schema.as_object_mut().unwrap().insert(
"description".into(),
serde_json::Value::String(format!("The {name}")),
);
}
properties.insert(name.to_string(), schema);
}
let n_required = rng.random_range(1..=names.len());
let mut required: Vec<&str> = names[..n_required].to_vec();
for (i, &name) in names.iter().enumerate() {
if types[i].is_media() && !required.contains(&name) {
required.push(name);
}
}
let has_textual = required.iter().any(|&r| {
names.iter().position(|&n| n == r).map(|i| types[i].is_textual()).unwrap_or(false)
});
if !has_textual {
if let Some(ti) = types.iter().position(|t| t.is_textual()) {
if !required.contains(&names[ti]) {
required.push(names[ti]);
}
} else {
properties.insert(names[0].to_string(), serde_json::json!({"type": "string"}));
types[0] = PropType::String;
}
}
let required_json: Vec<serde_json::Value> = required.iter()
.map(|s| serde_json::Value::String(s.to_string()))
.collect();
serde_json::json!({
"type": "object",
"properties": properties,
"required": required_json,
}).to_string()
}
pub fn random_vector_input_schema(rng: &mut impl Rng) -> String {
let mut schema = serde_json::Map::new();
if rng.random_range(0u32..2) == 0 {
let n_ctx = rng.random_range(1u32..=3) as usize;
let ctx_names = pick_names(n_ctx, rng);
let mut ctx_types = Vec::with_capacity(n_ctx);
let mut ctx_props = serde_json::Map::new();
for &name in &ctx_names {
let pt = type_for_name(name, rng);
ctx_types.push(pt);
ctx_props.insert(name.to_string(), pt.to_json());
}
let n_req = rng.random_range(1..=ctx_names.len());
let mut ctx_required: Vec<&str> = ctx_names[..n_req].to_vec();
for (i, &name) in ctx_names.iter().enumerate() {
if ctx_types[i].is_media() && !ctx_required.contains(&name) {
ctx_required.push(name);
}
}
schema.insert("context".into(), serde_json::json!({
"type": "object",
"properties": ctx_props,
"required": ctx_required,
}));
}
let items_schema = match rng.random_range(0u32..5) {
0 => {
serde_json::json!({"type": "string", "description": random_item_description(rng)})
}
1 => {
serde_json::json!({"type": "image", "description": random_item_description(rng)})
}
_ => {
let n_item_props = rng.random_range(1u32..=4) as usize;
let item_names = pick_names(n_item_props, rng);
let mut item_types = Vec::with_capacity(n_item_props);
let mut item_props = serde_json::Map::new();
for &name in &item_names {
let pt = type_for_name(name, rng);
item_types.push(pt);
item_props.insert(name.to_string(), pt.to_json());
}
let n_req = rng.random_range(1..=item_names.len());
let mut item_required: Vec<&str> = item_names[..n_req].to_vec();
for (i, &name) in item_names.iter().enumerate() {
if item_types[i].is_media() && !item_required.contains(&name) {
item_required.push(name);
}
}
serde_json::json!({
"type": "object",
"properties": item_props,
"required": item_required,
})
}
};
schema.insert("items".into(), items_schema);
serde_json::to_string(&serde_json::Value::Object(schema)).unwrap()
}
fn random_item_description(rng: &mut impl Rng) -> &'static str {
const DESCS: &[&str] = &[
"An item to rank",
"A candidate result",
"An entry to evaluate",
"A response option",
"A submission to score",
"An alternative to compare",
"A candidate to assess",
"A sample to judge",
];
DESCS[rng.random_range(0..DESCS.len())]
}