use serde::Deserialize;
use serde::de::{self, Deserializer};
use serde_json::{Map, Value};
use crw_core::types::{ChangeTrackingMode, ChangeTrackingOptions, OutputFormat};
#[derive(Debug, Clone)]
pub enum FormatSpec {
String(String),
Object(Map<String, Value>),
}
impl<'de> Deserialize<'de> for FormatSpec {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
match Value::deserialize(deserializer)? {
Value::String(s) => Ok(FormatSpec::String(s)),
Value::Object(m) => Ok(FormatSpec::Object(m)),
other => Err(de::Error::custom(format!(
"each entry of `formats` must be a string or an object with a `type` field, got {}",
json_kind(&other)
))),
}
}
}
fn json_kind(v: &Value) -> &'static str {
match v {
Value::Null => "null",
Value::Bool(_) => "a boolean",
Value::Number(_) => "a number",
Value::String(_) => "a string",
Value::Array(_) => "an array",
Value::Object(_) => "an object",
}
}
#[derive(Debug, Default)]
pub struct DecomposedFormats {
pub formats: Vec<OutputFormat>,
pub json_schema: Option<Value>,
pub change_tracking: Option<ChangeTrackingOptions>,
pub screenshot_requested: bool,
pub unsupported: Vec<String>,
}
pub fn decompose(specs: &[FormatSpec]) -> Result<DecomposedFormats, String> {
let mut out = DecomposedFormats::default();
let mut screenshot_seen = false;
for spec in specs {
match spec {
FormatSpec::String(s) => handle_token(s, None, &mut out, &mut screenshot_seen)?,
FormatSpec::Object(m) => {
let ty = m.get("type").and_then(Value::as_str).ok_or_else(|| {
"each `formats` object requires a string `type` field".to_string()
})?;
handle_token(ty, Some(m), &mut out, &mut screenshot_seen)?;
}
}
}
if out.formats.contains(&OutputFormat::ChangeTracking)
&& !out.formats.contains(&OutputFormat::Markdown)
{
out.formats.push(OutputFormat::Markdown);
}
if out.formats.is_empty() {
out.formats.push(OutputFormat::Markdown);
}
Ok(out)
}
fn handle_token(
ty: &str,
obj: Option<&Map<String, Value>>,
out: &mut DecomposedFormats,
screenshot_seen: &mut bool,
) -> Result<(), String> {
match ty {
"screenshot" => {
if *screenshot_seen {
return Err("only one screenshot format allowed per request".to_string());
}
*screenshot_seen = true;
out.screenshot_requested = true;
out.unsupported.push("screenshot".to_string());
Ok(())
}
"images" | "attributes" | "branding" | "audio" | "query" => {
out.unsupported.push(ty.to_string());
Ok(())
}
_ => {
let fmt = OutputFormat::parse_loose(ty)?;
if !out.formats.contains(&fmt) {
out.formats.push(fmt);
}
if fmt == OutputFormat::Json
&& let Some(m) = obj
&& let Some(schema) = m.get("schema")
{
out.json_schema = Some(schema.clone());
}
if fmt == OutputFormat::ChangeTracking {
out.change_tracking = Some(match obj {
Some(m) => parse_change_tracking(m)?,
None => ChangeTrackingOptions {
modes: vec![ChangeTrackingMode::GitDiff],
..Default::default()
},
});
}
Ok(())
}
}
}
fn parse_change_tracking(m: &Map<String, Value>) -> Result<ChangeTrackingOptions, String> {
let modes = match m.get("modes") {
Some(v) => serde_json::from_value::<Vec<ChangeTrackingMode>>(v.clone())
.map_err(|e| format!("invalid changeTracking modes: {e}"))?,
None => vec![ChangeTrackingMode::GitDiff],
};
Ok(ChangeTrackingOptions {
modes: if modes.is_empty() {
vec![ChangeTrackingMode::GitDiff]
} else {
modes
},
schema: m.get("schema").cloned(),
prompt: m.get("prompt").and_then(Value::as_str).map(str::to_string),
previous: None,
tag: m.get("tag").and_then(Value::as_str).map(str::to_string),
content_type: None,
})
}
pub fn unsupported_warning(unsupported: &[String]) -> Option<String> {
if unsupported.is_empty() {
return None;
}
Some(format!(
"the following requested formats are not yet produced by this engine and were ignored: {}",
unsupported.join(", ")
))
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
fn specs(v: serde_json::Value) -> Vec<FormatSpec> {
serde_json::from_value(v).expect("formats parse")
}
#[test]
fn bare_strings_parse() {
let d = decompose(&specs(json!(["markdown", "html", "links"]))).unwrap();
assert_eq!(
d.formats,
vec![
OutputFormat::Markdown,
OutputFormat::Html,
OutputFormat::Links
]
);
}
#[test]
fn object_json_lifts_schema() {
let schema = json!({"type": "object", "properties": {"title": {"type": "string"}}});
let d = decompose(&specs(json!([
{"type": "json", "schema": schema.clone()},
{"type": "summary"}
])))
.unwrap();
assert!(d.formats.contains(&OutputFormat::Json));
assert!(d.formats.contains(&OutputFormat::Summary));
assert_eq!(d.json_schema.as_ref(), Some(&schema));
}
#[test]
fn mixed_string_and_object() {
let d = decompose(&specs(json!([
"markdown",
{"type": "json", "schema": {"type": "object"}}
])))
.unwrap();
assert!(d.formats.contains(&OutputFormat::Markdown));
assert!(d.formats.contains(&OutputFormat::Json));
}
#[test]
fn two_screenshots_rejected() {
let err = decompose(&specs(json!([
{"type": "screenshot"},
{"type": "screenshot", "fullPage": true}
])))
.unwrap_err();
assert!(err.contains("only one screenshot"));
}
#[test]
fn change_tracking_auto_adds_markdown() {
let d = decompose(&specs(
json!([{"type": "changeTracking", "modes": ["gitDiff"]}]),
))
.unwrap();
assert!(d.formats.contains(&OutputFormat::Markdown));
assert!(d.formats.contains(&OutputFormat::ChangeTracking));
assert!(d.change_tracking.is_some());
}
#[test]
fn unsupported_formats_collected_not_fatal() {
let d = decompose(&specs(
json!(["markdown", {"type": "images"}, {"type": "screenshot"}]),
))
.unwrap();
assert!(d.formats.contains(&OutputFormat::Markdown));
assert!(d.unsupported.contains(&"images".to_string()));
assert!(d.unsupported.contains(&"screenshot".to_string()));
assert!(unsupported_warning(&d.unsupported).is_some());
}
#[test]
fn object_missing_type_errors() {
let err = decompose(&specs(json!([{"schema": {}}]))).unwrap_err();
assert!(err.contains("type"));
}
#[test]
fn unknown_format_errors_with_v1_wording() {
let err = decompose(&specs(json!(["bogus"]))).unwrap_err();
assert!(err.contains("Unknown format 'bogus'"));
}
#[test]
fn empty_formats_default_to_markdown() {
let d = decompose(&[]).unwrap();
assert_eq!(d.formats, vec![OutputFormat::Markdown]);
}
}