use std::collections::HashSet;
use std::io::Read;
use std::process::ExitCode;
use clap::{Parser, ValueEnum};
use schemaorg_rs::profiles::{Eligibility, ProfileRegistry, ProfileResult};
use schemaorg_rs::validation::diagnostics::{Severity, ValidationDiagnostic};
use schemaorg_rs::validation::ValidationResult;
use schemaorg_rs::{extract_all, validation, vocabulary};
#[derive(Debug, thiserror::Error)]
enum CliError {
#[error("io error: {0}")]
Io(#[from] std::io::Error),
#[error("http error: {0}")]
Http(#[from] Box<ureq::Error>),
#[error("extraction failed: {0}")]
Extraction(#[from] schemaorg_rs::ExtractionError),
#[error("profile error: {0}")]
Profile(#[from] schemaorg_rs::ProfileError),
#[error("{0}")]
InvalidInput(String),
}
#[derive(Parser)]
#[command(
name = "schemaorg-validate",
version,
about = "Validate Schema.org structured data in HTML"
)]
struct Cli {
#[arg(long, group = "input")]
file: Option<std::path::PathBuf>,
#[arg(long, group = "input")]
url: Option<String>,
#[arg(long, group = "input")]
stdin: bool,
#[arg(long, default_value = "google", value_enum)]
profile: ProfileChoice,
#[arg(long, default_value = "text", value_enum)]
format: OutputFormat,
#[arg(long, default_value = "warning", value_enum)]
severity: SeverityFilter,
#[arg(long)]
no_color: bool,
#[arg(long)]
quiet: bool,
#[arg(long)]
schema_version: bool,
}
#[derive(Clone, ValueEnum)]
enum ProfileChoice {
Google,
Baseline,
None,
}
#[derive(Clone, ValueEnum)]
enum OutputFormat {
Text,
Json,
Sarif,
}
#[derive(Clone, ValueEnum)]
enum SeverityFilter {
Error,
Warning,
Info,
}
impl SeverityFilter {
const fn passes(&self, severity: Severity) -> bool {
match self {
Self::Error => matches!(severity, Severity::Error),
Self::Warning => matches!(severity, Severity::Error | Severity::Warning),
Self::Info => true,
}
}
}
fn main() -> ExitCode {
let cli = Cli::parse();
if cli.schema_version {
println!("Schema.org v{}", vocabulary::schema_version());
return ExitCode::SUCCESS;
}
match run(&cli) {
Ok(has_errors) => {
if has_errors {
ExitCode::from(1)
} else {
ExitCode::SUCCESS
}
}
Err(e) => {
eprintln!("Error: {e}");
ExitCode::from(2)
}
}
}
fn run(cli: &Cli) -> Result<bool, CliError> {
let (html, source_name) = read_input(cli)?;
let graph = extract_all(&html)?;
let vocab_result = validation::validate(&graph);
let profile_result = match cli.profile {
ProfileChoice::Google => {
let registry = ProfileRegistry::with_google();
Some(registry.evaluate("google", &graph, &vocab_result.diagnostics)?)
}
ProfileChoice::Baseline => {
let registry = ProfileRegistry::with_baseline();
Some(registry.evaluate("baseline", &graph, &vocab_result.diagnostics)?)
}
ProfileChoice::None => None,
};
if !cli.quiet {
match cli.format {
OutputFormat::Text => {
format_text(&vocab_result, profile_result.as_ref(), &source_name, cli);
}
OutputFormat::Json => {
format_json(&vocab_result, profile_result.as_ref(), &source_name, &graph)?;
}
OutputFormat::Sarif => {
format_sarif(&vocab_result, profile_result.as_ref(), &source_name)?;
}
}
}
let has_vocab_errors = vocab_result.has_errors();
let has_profile_errors = profile_result
.as_ref()
.is_some_and(|pr| pr.eligibility == Eligibility::NotEligible);
Ok(has_vocab_errors || has_profile_errors)
}
fn read_input(cli: &Cli) -> Result<(String, String), CliError> {
if let Some(path) = &cli.file {
let html = std::fs::read_to_string(path).map_err(|e| {
CliError::InvalidInput(
format!("cannot read file '{}': {e}", path.display()),
)
})?;
Ok((html, path.display().to_string()))
} else if let Some(url) = &cli.url {
let resp = ureq::get(url)
.set("User-Agent", concat!("schemaorg-validate/", env!("CARGO_PKG_VERSION")))
.call()
.map_err(|e| CliError::Http(Box::new(e)))?;
let html = resp.into_string()
.map_err(|e| CliError::InvalidInput(format!("failed to read response body: {e}")))?;
Ok((html, url.clone()))
} else if cli.stdin {
let mut html = String::new();
std::io::stdin().read_to_string(&mut html)?;
Ok((html, "<stdin>".to_string()))
} else {
Err(CliError::InvalidInput(
"no input specified (use --file, --url, or --stdin)".into(),
))
}
}
struct Colors {
red: &'static str,
yellow: &'static str,
blue: &'static str,
green: &'static str,
bold: &'static str,
dim: &'static str,
reset: &'static str,
}
const COLORS_ON: Colors = Colors {
red: "\x1b[31m",
yellow: "\x1b[33m",
blue: "\x1b[34m",
green: "\x1b[32m",
bold: "\x1b[1m",
dim: "\x1b[2m",
reset: "\x1b[0m",
};
const COLORS_OFF: Colors = Colors {
red: "",
yellow: "",
blue: "",
green: "",
bold: "",
dim: "",
reset: "",
};
fn use_color(cli: &Cli) -> bool {
if cli.no_color {
return false;
}
std::env::var("NO_COLOR").is_err()
}
fn format_text(
vocab_result: &ValidationResult,
profile_result: Option<&ProfileResult>,
source_name: &str,
cli: &Cli,
) {
let c = if use_color(cli) { &COLORS_ON } else { &COLORS_OFF };
println!(
"\n{dim}-- schemaorg-validate {reset}{dim}{}{reset}",
"-".repeat(40),
dim = c.dim,
reset = c.reset,
);
println!(" {bold}Source:{reset} {source_name}", bold = c.bold, reset = c.reset);
if profile_result.is_some() {
let profile_name = match cli.profile {
ProfileChoice::Google => "google-rich-results",
ProfileChoice::Baseline => "baseline",
ProfileChoice::None => "none",
};
println!(" {bold}Profile:{reset} {profile_name}", bold = c.bold, reset = c.reset);
}
println!(
" {bold}Schema.org:{reset} v{}",
vocabulary::schema_version(),
bold = c.bold,
reset = c.reset,
);
let filtered: Vec<_> = vocab_result
.diagnostics
.iter()
.filter(|d| cli.severity.passes(d.severity))
.collect();
if filtered.is_empty() {
println!(
"\n {green}\u{2713} No vocabulary issues found{reset}",
green = c.green,
reset = c.reset,
);
} else {
println!(
"\n{dim}-- Vocabulary {reset}{dim}{}{reset}",
"-".repeat(47),
dim = c.dim,
reset = c.reset,
);
for diag in &filtered {
print_diagnostic(diag, c);
}
}
if let Some(pr) = profile_result {
println!(
"\n{dim}-- Profile Results {reset}{dim}{}{reset}",
"-".repeat(42),
dim = c.dim,
reset = c.reset,
);
for tr in &pr.type_results {
let status = if tr.eligible {
format!("{green}ELIGIBLE{reset}", green = c.green, reset = c.reset)
} else {
format!("{red}NOT ELIGIBLE{reset}", red = c.red, reset = c.reset)
};
println!(" {bold}{}{reset}: {status}", tr.schema_type, bold = c.bold, reset = c.reset);
if !tr.required_missing.is_empty() {
println!(
" {red}Required missing:{reset} {}",
tr.required_missing.join(", "),
red = c.red,
reset = c.reset,
);
}
if !tr.recommended_missing.is_empty() {
println!(
" {yellow}Recommended missing:{reset} {}",
tr.recommended_missing.join(", "),
yellow = c.yellow,
reset = c.reset,
);
}
}
let profile_diags: Vec<_> = pr
.diagnostics
.iter()
.filter(|d| cli.severity.passes(d.severity))
.collect();
for diag in &profile_diags {
print_diagnostic(diag, c);
}
let elig_str = match pr.eligibility {
Eligibility::Eligible => {
format!("{green}Eligible{reset}", green = c.green, reset = c.reset)
}
Eligibility::WarningsOnly => format!(
"{yellow}Eligible (with warnings){reset}",
yellow = c.yellow,
reset = c.reset,
),
Eligibility::NotEligible => {
format!("{red}Not Eligible{reset}", red = c.red, reset = c.reset)
}
Eligibility::Restricted => format!(
"{blue}Restricted{reset}",
blue = c.blue,
reset = c.reset,
),
};
println!("\n {bold}Eligibility:{reset} {elig_str}", bold = c.bold, reset = c.reset);
}
let error_count = vocab_result.errors().count()
+ profile_result.map_or(0, |pr| {
pr.diagnostics.iter().filter(|d| d.severity == Severity::Error).count()
});
let warning_count = vocab_result.warnings().count()
+ profile_result.map_or(0, |pr| {
pr.diagnostics
.iter()
.filter(|d| d.severity == Severity::Warning)
.count()
});
println!(
"\n {error_count} error(s), {warning_count} warning(s)\n",
);
}
fn print_diagnostic(diag: &ValidationDiagnostic, c: &Colors) {
let (icon, color) = match diag.severity {
Severity::Error => ("\u{2717}", c.red),
Severity::Warning => ("\u{26a0}", c.yellow),
Severity::Info => ("\u{2139}", c.blue),
};
let severity_label = match diag.severity {
Severity::Error => "ERROR",
Severity::Warning => "WARN ",
Severity::Info => "INFO ",
};
let loc = diag
.source_location
.as_ref()
.map(|l| format!(" (line {})", l.line))
.unwrap_or_default();
println!(
" {color}{icon} {severity_label}{reset} {path}{loc} -- {msg}",
color = color,
reset = c.reset,
path = diag.path,
msg = diag.message,
);
}
fn format_json(
vocab_result: &ValidationResult,
profile_result: Option<&ProfileResult>,
source_name: &str,
graph: &schemaorg_rs::StructuredDataGraph,
) -> Result<(), CliError> {
let formats: Vec<String> = graph
.nodes
.iter()
.map(|n| format!("{:?}", n.source_format))
.collect::<HashSet<_>>()
.into_iter()
.collect();
let mut output = serde_json::json!({
"source": source_name,
"schema_version": vocabulary::schema_version(),
"extraction": {
"node_count": graph.nodes.len(),
"formats": formats,
"warning_count": graph.warnings.len()
},
"vocabulary": {
"diagnostics": vocab_result.diagnostics,
"error_count": vocab_result.errors().count(),
"warning_count": vocab_result.warnings().count()
}
});
if let Some(pr) = profile_result {
output["profile"] = serde_json::json!({
"eligibility": pr.eligibility.to_string(),
"type_results": pr.type_results,
"diagnostics": pr.diagnostics
});
}
println!(
"{}",
serde_json::to_string_pretty(&output)
.map_err(|e| CliError::InvalidInput(format!("JSON serialization failed: {e}")))?
);
Ok(())
}
fn format_sarif(
vocab_result: &ValidationResult,
profile_result: Option<&ProfileResult>,
source_name: &str,
) -> Result<(), CliError> {
let sarif = schemaorg_rs::sarif::build_sarif(vocab_result, profile_result, source_name);
println!(
"{}",
serde_json::to_string_pretty(&sarif)
.map_err(|e| CliError::InvalidInput(format!("SARIF serialization failed: {e}")))?
);
Ok(())
}