use std::io::{self, BufReader, IsTerminal, Read};
use std::path::{Path, PathBuf};
use anyhow::{Context as _, Result, bail};
use clap::{CommandFactory, Parser};
use schema_analysis::context::{Context, DefaultContext};
use schema_analysis::helpers::xml::cleanup_xml_schema;
use schema_analysis::targets::json_typegen::OutputMode;
use schema_analysis::targets::schemars::JsonSchemaVersion;
use schema_analysis::{Coalesce, InferredSchema, Schema};
use serde::Serialize;
use serde::de::DeserializeSeed;
#[derive(Parser)]
#[command(
name = "schema_analysis",
version,
about = "Infer schemas from any Serde-compatible format"
)]
struct Cli {
files: Vec<PathBuf>,
#[arg(long, value_enum)]
format: Option<InputFormat>,
#[arg(long, value_enum, default_value = "schema")]
output: Output,
#[arg(long, default_value = "Root")]
name: String,
#[arg(long)]
compact: bool,
#[arg(long)]
minimal: bool,
}
#[derive(Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
enum InputFormat {
Json,
Yaml,
Xml,
Toml,
Cbor,
Bson,
}
#[derive(Clone, Copy, clap::ValueEnum)]
enum Output {
Schema,
Rust,
Typescript,
TypescriptAlias,
Kotlin,
KotlinKotlinx,
JsonSchema,
Shape,
}
fn main() -> Result<()> {
let cli = Cli::parse();
if cli.files.is_empty() && io::stdin().is_terminal() {
Cli::command().print_help()?;
return Ok(());
}
let format = resolve_format(&cli)?;
let output = if cli.minimal {
let mut schema = infer_schema::<()>(format, &cli.files)?;
if format == InputFormat::Xml {
cleanup_xml_schema(&mut schema);
}
generate_output(&schema, &cli)?
} else {
let mut schema = infer_schema::<DefaultContext>(format, &cli.files)?;
if format == InputFormat::Xml {
cleanup_xml_schema(&mut schema);
}
generate_output(&schema, &cli)?
};
println!("{output}");
Ok(())
}
fn resolve_format(cli: &Cli) -> Result<InputFormat> {
if let Some(format) = cli.format {
return Ok(format);
}
if cli.files.is_empty() {
return Ok(InputFormat::Json);
}
detect_format(&cli.files[0])
}
fn detect_format(path: &Path) -> Result<InputFormat> {
let ext = path
.extension()
.and_then(|e| e.to_str())
.map(|e| e.to_ascii_lowercase());
match ext.as_deref() {
Some("json") => Ok(InputFormat::Json),
Some("yaml" | "yml") => Ok(InputFormat::Yaml),
Some("xml") => Ok(InputFormat::Xml),
Some("toml") => Ok(InputFormat::Toml),
Some("cbor") => Ok(InputFormat::Cbor),
Some("bson") => Ok(InputFormat::Bson),
Some(other) => bail!(
"Unrecognized file extension '.{other}'. Use --format to specify the input format.\n\
Supported formats: json, yaml, xml, toml, cbor, bson"
),
None => bail!(
"File '{}' has no extension. Use --format to specify the input format.",
path.display()
),
}
}
fn infer_schema<C: Context + Default>(format: InputFormat, files: &[PathBuf]) -> Result<Schema<C>>
where
Schema<C>: Coalesce,
{
if files.is_empty() {
let reader = BufReader::new(io::stdin().lock());
let inferred: InferredSchema<C> =
deserialize(format, reader).context("Failed to parse stdin")?;
return Ok(inferred.schema);
}
let mut state: Option<InferredSchema<C>> = None;
for path in files {
let reader = open_file(path)?;
match &mut state {
None => {
state = Some(
deserialize(format, reader)
.with_context(|| format!("Failed to parse '{}'", path.display()))?,
);
}
Some(inferred) => {
merge(format, inferred, reader)
.with_context(|| format!("Failed to parse '{}'", path.display()))?;
}
}
}
Ok(state.expect("files is non-empty").schema)
}
fn open_file(path: &Path) -> Result<BufReader<std::fs::File>> {
let file = std::fs::File::open(path)
.with_context(|| format!("Failed to open '{}'", path.display()))?;
Ok(BufReader::new(file))
}
fn deserialize<C: Context + Default>(
format: InputFormat,
reader: impl Read,
) -> Result<InferredSchema<C>> {
match format {
InputFormat::Json => serde_json::from_reader(reader).context("Failed to parse JSON"),
InputFormat::Yaml => {
let bytes = read_all(reader)?;
serde_yaml::from_slice(&bytes).context("Failed to parse YAML")
}
InputFormat::Xml => {
quick_xml::de::from_reader(BufReader::new(reader)).context("Failed to parse XML")
}
InputFormat::Toml => {
let s = read_all_string(reader)?;
toml::from_str(&s).context("Failed to parse TOML")
}
InputFormat::Cbor => serde_cbor::from_reader(reader).context("Failed to parse CBOR"),
InputFormat::Bson => {
let bytes = read_all(reader)?;
bson::from_slice(&bytes).context("Failed to parse BSON")
}
}
}
fn merge<C: Context>(
format: InputFormat,
inferred: &mut InferredSchema<C>,
reader: impl Read,
) -> Result<()>
where
Schema<C>: Coalesce,
{
match format {
InputFormat::Json => {
let mut de = serde_json::Deserializer::from_reader(reader);
inferred.deserialize(&mut de)?;
}
InputFormat::Cbor => {
let mut de = serde_cbor::Deserializer::from_reader(reader);
inferred.deserialize(&mut de)?;
}
InputFormat::Yaml => {
let bytes = read_all(reader)?;
for document in serde_yaml::Deserializer::from_slice(&bytes) {
inferred.deserialize(document)?;
}
}
InputFormat::Xml => {
let mut de = quick_xml::de::Deserializer::from_reader(BufReader::new(reader));
inferred.deserialize(&mut de)?;
}
InputFormat::Toml => {
let s = read_all_string(reader)?;
let de = toml::Deserializer::parse(&s)?;
inferred.deserialize(de)?;
}
InputFormat::Bson => {
let doc = bson::Document::from_reader(reader).context("Failed to parse BSON")?;
let de = bson::Deserializer::new(bson::Bson::Document(doc));
inferred.deserialize(de)?;
}
}
Ok(())
}
fn read_all(mut reader: impl Read) -> Result<Vec<u8>> {
let mut buf = Vec::new();
reader.read_to_end(&mut buf)?;
Ok(buf)
}
fn read_all_string(mut reader: impl Read) -> Result<String> {
let mut s = String::new();
reader.read_to_string(&mut s)?;
Ok(s)
}
fn generate_output<C: Context>(schema: &Schema<C>, cli: &Cli) -> Result<String>
where
Schema<C>: Serialize,
{
match cli.output {
Output::Schema => {
if cli.compact {
serde_json::to_string(schema).context("Failed to serialize schema")
} else {
serde_json::to_string_pretty(schema).context("Failed to serialize schema")
}
}
Output::JsonSchema => schema
.to_json_schema_with_schemars_version(&JsonSchemaVersion::Draft2020_12)
.context("Failed to generate JSON Schema"),
output => {
let mode = match output {
Output::Rust => OutputMode::Rust,
Output::Typescript => OutputMode::Typescript,
Output::TypescriptAlias => OutputMode::TypescriptTypeAlias,
Output::Kotlin => OutputMode::KotlinJackson,
Output::KotlinKotlinx => OutputMode::KotlinKotlinx,
Output::Shape => OutputMode::Shape,
Output::Schema | Output::JsonSchema => unreachable!(),
};
schema
.process_with_json_typegen_options(&cli.name, &{
let mut opts = schema_analysis::targets::json_typegen::Options::default();
opts.output_mode = mode;
opts
})
.map_err(|e| anyhow::anyhow!("{e}"))
}
}
}