mod diagnostic;
use clap::Parser;
use colored::Colorize;
use rtemis_a3::{A3, A3_SCHEMA_URI, A3_VERSION, A3Error, validate};
use serde_json::{Value, json};
use std::io::{self, IsTerminal, Read};
use std::process;
#[derive(Parser)]
#[command(
name = "a3",
version,
about = "Validate and inspect A3 amino acid annotation files"
)]
struct Cli {
file: String,
#[arg(short, long, default_value_t = 20)]
limit: usize,
#[arg(short, long)]
quiet: bool,
#[arg(short, long)]
json: bool,
#[arg(short = 'D', long)]
diagnose: bool,
}
fn wrap_words(text: &str, width: usize) -> Vec<String> {
if width == 0 || text.chars().count() <= width {
return vec![text.to_string()];
}
let mut lines: Vec<String> = Vec::new();
let mut current = String::new();
let mut current_width = 0usize;
for word in text.split_whitespace() {
let word_width = word.chars().count();
if current.is_empty() {
current.push_str(word);
current_width = word_width;
} else if current_width + 1 + word_width <= width {
current.push(' ');
current.push_str(word);
current_width += 1 + word_width;
} else {
lines.push(current.clone());
current = word.to_string();
current_width = word_width;
}
}
if !current.is_empty() {
lines.push(current);
}
if lines.is_empty() {
vec![text.to_string()]
} else {
lines
}
}
fn build_hint(names: &[String], available: usize) -> String {
if names.is_empty() || available < 2 {
return String::new();
}
let more_than_three = names.len() > 3;
let mut result = String::new();
for (i, name) in names.iter().take(3).enumerate() {
let sep = if i == 0 { "" } else { ", " };
let candidate = format!("{}{}", sep, name);
let after_cols = result.chars().count() + candidate.chars().count();
let is_last = i + 1 == names.len() && !more_than_three;
let reserve = if is_last { 0 } else { 1 };
if after_cols + reserve <= available {
result.push_str(&candidate);
} else {
if result.chars().count() < available {
result.push('…');
}
return result;
}
}
if more_than_three && result.chars().count() < available {
result.push('…');
}
result
}
fn print_human(a3: &A3, errors: &[String], limit: usize) {
println!();
if errors.is_empty() {
println!(
" {} {} {}",
"✓ valid".green().bold(),
format!("A3 {}", a3.a3_version())
.bold()
.truecolor(71, 156, 255),
a3.schema().dimmed(),
);
} else {
println!(" {}", "✗ invalid".red().bold());
println!();
let last = errors.len() - 1;
for (i, e) in errors.iter().enumerate() {
let connector = if i == last { "└──" } else { "├──" };
println!(" {} {}", connector.dimmed(), e.red());
}
}
println!();
let seq = a3.sequence();
let char_count = seq.chars().count();
let preview: String = seq.chars().take(limit).collect();
let seq_display = if char_count > limit {
format!("{}… (length = {})", preview, char_count)
} else {
format!("{} (length = {})", seq, char_count)
};
println!(
" {} {}",
"Sequence".bold(),
seq_display.truecolor(220, 150, 86)
);
println!();
println!(" {}", "Annotations".bold());
let ann = a3.annotations();
let mut site_names: Vec<String> = ann.site().keys().cloned().collect();
site_names.sort();
let mut region_names: Vec<String> = ann.region().keys().cloned().collect();
region_names.sort();
let mut ptm_names: Vec<String> = ann.ptm().keys().cloned().collect();
ptm_names.sort();
let mut proc_names: Vec<String> = ann.processing().keys().cloned().collect();
proc_names.sort();
let var_names: Vec<String> = ann
.variant()
.iter()
.map(|v| format!("pos {}", v.position()))
.collect();
let entries = [
("site", ann.site().len(), site_names),
("region", ann.region().len(), region_names),
("ptm", ann.ptm().len(), ptm_names),
("processing", ann.processing().len(), proc_names),
("variant", ann.variant().len(), var_names),
];
let last = entries.len() - 1;
for (i, (name, count, names)) in entries.iter().enumerate() {
let connector = if i == last { "└──" } else { "├──" };
let padded = format!("{:<12}", name);
let count_str = if *count == 0 {
"—".dimmed().to_string()
} else {
count.to_string().truecolor(220, 150, 86).to_string()
};
let prefix_cols = 21
+ if *count == 0 {
1
} else {
count.to_string().len()
};
let available = 90usize.saturating_sub(prefix_cols + 1); let hint_content = build_hint(names, available);
let hint = if hint_content.is_empty() {
String::new()
} else {
format!(" {}", format!("({})", hint_content).dimmed())
};
println!(" {} {}{}{}", connector.dimmed(), padded, count_str, hint);
}
println!();
println!(" {}", "Metadata".bold());
let meta = a3.metadata();
let meta_rows: [(&str, &str); 4] = [
("UniProt ID", meta.uniprot_id()),
("Description", meta.description()),
("Reference", meta.reference()),
("Organism", meta.organism()),
];
let label_width = meta_rows.iter().map(|(l, _)| l.len()).max().unwrap_or(0);
let value_col = 8 + label_width;
let value_width = 90usize.saturating_sub(value_col);
let last = meta_rows.len() - 1;
for (i, (label, value)) in meta_rows.iter().enumerate() {
let is_last = i == last;
let connector = if is_last { "└──" } else { "├──" };
let continuation = if is_last {
" ".repeat(value_col)
} else {
format!(" {}{}", "│".dimmed(), " ".repeat(value_col - 3))
};
if value.is_empty() {
println!(
" {} {:<label_width$} {}",
connector.dimmed(),
label,
"—".dimmed(),
label_width = label_width,
);
} else {
let lines = wrap_words(value, value_width);
print!(
" {} {:<label_width$} {}",
connector.dimmed(),
label,
lines[0].truecolor(220, 150, 86),
label_width = label_width,
);
for line in &lines[1..] {
print!("\n{}{}", continuation, line.truecolor(220, 150, 86));
}
println!();
}
}
println!();
}
fn build_json(a3: &A3, errors: &[String], limit: usize) -> Value {
let meta = a3.metadata();
let ann = a3.annotations();
let seq = a3.sequence();
let char_count = seq.chars().count();
let preview: String = seq.chars().take(limit).collect();
json!({
"valid": errors.is_empty(),
"errors": errors,
"metadata": {
"uniprot_id": meta.uniprot_id(),
"description": meta.description(),
"reference": meta.reference(),
"organism": meta.organism(),
},
"sequence_length": char_count,
"sequence_preview": preview,
"annotations": {
"site": ann.site().len(),
"region": ann.region().len(),
"ptm": ann.ptm().len(),
"processing": ann.processing().len(),
"variant": ann.variant().len(),
}
})
}
fn read_input(file: &str) -> Result<String, String> {
if file == "-" {
let mut buf = String::new();
io::stdin()
.read_to_string(&mut buf)
.map_err(|e| format!("Error reading stdin: {e}"))?;
Ok(buf)
} else {
std::fs::read_to_string(file).map_err(|e| format!("Error reading '{file}': {e}"))
}
}
fn main() {
let cli = Cli::parse();
if !std::io::stdout().is_terminal() {
colored::control::set_override(false);
}
let content = read_input(&cli.file).unwrap_or_else(|e| {
if !cli.quiet {
eprintln!("{e}");
}
process::exit(2);
});
if cli.diagnose {
match diagnostic::a3_diagnose(&content) {
Ok(a3) => {
if !cli.quiet {
if cli.json {
println!(
"{}",
serde_json::to_string_pretty(&build_json(&a3, &[], cli.limit)).unwrap()
);
} else {
print_human(&a3, &[], cli.limit);
}
}
process::exit(0);
}
Err(err) => {
let (errors, exit_code) = match &err {
diagnostic::DiagnoseError::Fatal(e) => (e.as_slice(), 2i32),
diagnostic::DiagnoseError::Invalid(e) => (e.as_slice(), 1i32),
};
if !cli.quiet {
if cli.json {
println!(
"{}",
serde_json::to_string_pretty(&json!({
"valid": false,
"errors": errors,
}))
.unwrap()
);
} else {
println!("\n {}", "✗ invalid".red().bold());
println!();
let last = errors.len() - 1;
for (i, msg) in errors.iter().enumerate() {
let connector = if i == last { "└──" } else { "├──" };
println!(" {} {}", connector.dimmed(), msg.red());
}
println!();
}
}
process::exit(exit_code);
}
}
}
let raw: A3 = match serde_json::from_str(&content) {
Ok(r) => r,
Err(e) => {
if !cli.quiet {
let mut errors = vec![format!("Invalid A3: {e}")];
if let Ok(value) = serde_json::from_str::<serde_json::Value>(&content) {
match value.get("$schema").and_then(|v| v.as_str()) {
Some(s) if s != A3_SCHEMA_URI => {
errors.push(format!("'$schema' must be '{A3_SCHEMA_URI}', got '{s}'"));
}
None => {
errors.push(format!(
"'$schema' is required and must be '{A3_SCHEMA_URI}'"
));
}
_ => {}
}
match value.get("a3_version").and_then(|v| v.as_str()) {
Some(v) if v != A3_VERSION => {
errors.push(format!("'a3_version' must be '{A3_VERSION}', got '{v}'"));
}
None => {
errors.push(format!(
"'a3_version' is required and must be '{A3_VERSION}'"
));
}
_ => {}
}
}
if cli.json {
println!(
"{}",
serde_json::to_string_pretty(&json!({
"valid": false,
"errors": errors,
}))
.unwrap()
);
} else {
println!("\n {}", "✗ invalid".red().bold());
println!();
let last = errors.len() - 1;
for (i, msg) in errors.iter().enumerate() {
let connector = if i == last { "└──" } else { "├──" };
println!(" {} {}", connector.dimmed(), msg.red());
}
}
}
process::exit(2);
}
};
let raw_snapshot = raw.clone();
match validate(raw) {
Ok(a3) => {
if !cli.quiet {
if cli.json {
println!(
"{}",
serde_json::to_string_pretty(&build_json(&a3, &[], cli.limit)).unwrap()
);
} else {
print_human(&a3, &[], cli.limit);
}
}
process::exit(0);
}
Err(A3Error::Validate(errors)) => {
if !cli.quiet {
if cli.json {
println!(
"{}",
serde_json::to_string_pretty(&build_json(
&raw_snapshot,
&errors,
cli.limit,
))
.unwrap()
);
} else {
print_human(&raw_snapshot, &errors, cli.limit);
}
}
process::exit(1);
}
Err(e) => {
if !cli.quiet {
eprintln!("Unexpected error: {e}");
}
process::exit(2);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use rtemis_a3::a3_from_json;
const MULTIBYTE_JSON: &str = r#"{
"$schema": "https://schema.rtemis.org/a3/v1/schema.json",
"a3_version": "1.0.0",
"sequence": "MAEPRQ",
"annotations": {"site":{},"region":{},"ptm":{},"processing":{},"variant":[]},
"metadata": {"uniprot_id":"","description":"","reference":"","organism":""}
}"#;
#[test]
fn sequence_preview_does_not_panic_on_multibyte() {
let a3 = a3_from_json(MULTIBYTE_JSON).unwrap();
let result = std::panic::catch_unwind(|| {
build_json(&a3, &[], 3);
});
assert!(result.is_ok(), "build_json panicked on sequence preview");
let v = build_json(&a3, &[], 3);
assert_eq!(v["sequence_preview"], "MAE");
assert_eq!(v["sequence_length"], 6);
}
}