use serde::{Deserialize, Serialize};
use std::collections::HashMap;
pub use crate::benchmark::types::{ParsedVariantDetails, PositionDetails};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum ToolName {
#[serde(rename = "ferro")]
Ferro,
#[serde(rename = "mutalyzer")]
Mutalyzer,
#[serde(rename = "biocommons")]
Biocommons,
#[serde(rename = "hgvs-rs")]
HgvsRs,
}
impl ToolName {
pub fn as_str(&self) -> &'static str {
match self {
ToolName::Ferro => "ferro",
ToolName::Mutalyzer => "mutalyzer",
ToolName::Biocommons => "biocommons",
ToolName::HgvsRs => "hgvs-rs",
}
}
pub fn parse(s: &str) -> Option<Self> {
match s.to_lowercase().as_str() {
"ferro" => Some(ToolName::Ferro),
"mutalyzer" => Some(ToolName::Mutalyzer),
"biocommons" => Some(ToolName::Biocommons),
"hgvs-rs" | "hgvsrs" => Some(ToolName::HgvsRs),
_ => None,
}
}
pub fn all() -> &'static [ToolName] {
&[
ToolName::Ferro,
ToolName::Mutalyzer,
ToolName::Biocommons,
ToolName::HgvsRs,
]
}
}
impl std::fmt::Display for ToolName {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.as_str())
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum StructuredErrorCategory {
Parse(ParseErrorKind),
Reference(ReferenceErrorKind),
Validation(ValidationErrorKind),
Tool(ToolErrorKind),
Timeout,
Internal,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum ParseErrorKind {
InvalidSyntax,
InvalidAccession,
InvalidPosition,
InvalidEdit,
UnknownVariantType,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum ReferenceErrorKind {
SequenceNotFound,
TranscriptNotFound,
SequenceMismatch,
ChromosomeNotFound,
DatabaseError,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum ValidationErrorKind {
PositionOutOfBounds,
InvalidRange,
UnsupportedVariant,
IntronicNotSupported,
ProteinNotSupported,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum ToolErrorKind {
Unavailable,
ConfigurationError,
ExecutionFailed,
IncompatibleVersion,
}
impl std::fmt::Display for StructuredErrorCategory {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
StructuredErrorCategory::Parse(kind) => write!(f, "parse_error_{:?}", kind),
StructuredErrorCategory::Reference(kind) => write!(f, "reference_error_{:?}", kind),
StructuredErrorCategory::Validation(kind) => write!(f, "validation_error_{:?}", kind),
StructuredErrorCategory::Tool(kind) => write!(f, "tool_error_{:?}", kind),
StructuredErrorCategory::Timeout => write!(f, "timeout"),
StructuredErrorCategory::Internal => write!(f, "internal_error"),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum ErrorMode {
Silent,
#[default]
Lenient,
Strict,
}
impl ErrorMode {
pub fn as_str(&self) -> &'static str {
match self {
ErrorMode::Silent => "silent",
ErrorMode::Lenient => "lenient",
ErrorMode::Strict => "strict",
}
}
}
impl std::fmt::Display for ErrorMode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.as_str())
}
}
#[derive(Debug, Clone, Deserialize)]
pub struct SingleRequest {
pub hgvs: String,
pub tools: Option<Vec<ToolName>>,
pub timeout_seconds: Option<u32>,
#[serde(default)]
pub error_mode: ErrorMode,
}
#[derive(Debug, Clone, Deserialize)]
pub struct BatchRequest {
pub variants: Vec<String>,
pub tools: Option<Vec<ToolName>>,
pub timeout_seconds: Option<u32>,
#[serde(default)]
pub error_mode: ErrorMode,
}
#[derive(Debug, Serialize)]
pub struct SingleResponse {
pub input: String,
pub results: Vec<ToolResult>,
pub agreement: AgreementSummary,
pub processing_time_ms: u64,
}
#[derive(Debug, Serialize)]
pub struct BatchResponse {
pub total_variants: usize,
pub successful_variants: usize,
pub results: Vec<VariantBatchResult>,
pub total_processing_time_ms: u64,
}
#[derive(Debug, Serialize)]
pub struct VariantBatchResult {
pub input: String,
pub results: Vec<ToolResult>,
pub agreement: AgreementSummary,
}
#[derive(Debug, Serialize)]
pub struct ToolResult {
pub tool: ToolName,
pub success: bool,
pub output: Option<String>,
pub error: Option<String>,
pub error_category: Option<String>,
pub elapsed_ms: u64,
#[serde(skip_serializing_if = "Option::is_none")]
pub details: Option<ParsedVariantDetails>,
}
#[derive(Debug, Serialize)]
pub struct ValidateResponse {
pub input: String,
pub valid: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub errors: Option<Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub components: Option<ParsedVariantDetails>,
pub processing_time_ms: u64,
}
#[derive(Debug, Serialize)]
pub struct AgreementSummary {
pub all_agree: bool,
pub successful_tools: usize,
pub failed_tools: usize,
pub outputs: HashMap<String, Vec<ToolName>>,
}
#[derive(Debug, Clone, Serialize)]
pub struct ToolStatus {
pub tool: ToolName,
pub available: bool,
pub status: String,
pub last_check: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub mode: Option<String>,
}
#[derive(Debug, Clone, Serialize)]
pub struct HealthResponse {
pub status: String,
pub available_tools: Vec<ToolName>,
pub unavailable_tools: Vec<ToolName>,
pub tools: Vec<ToolStatus>,
}
#[derive(Debug, Clone, Serialize)]
pub struct DetailedHealthResponse {
#[serde(flatten)]
pub basic: HealthResponse,
pub test_results: Vec<ToolTestResults>,
}
#[derive(Debug, Clone, Serialize)]
pub struct ToolTestResults {
pub tool: ToolName,
pub passed: usize,
pub total: usize,
pub total_tests: usize,
#[serde(skip_serializing_if = "Option::is_none")]
pub mode: Option<String>,
pub categories: Vec<TestCategory>,
}
#[derive(Debug, Clone, Serialize)]
pub struct TestCategory {
pub name: String,
pub tests: Vec<TestResult>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
#[serde(rename_all = "lowercase")]
pub enum TestStatus {
Pass,
Fail,
Na,
}
#[derive(Debug, Clone, Serialize)]
pub struct TestResult {
pub name: String,
pub variant: String,
pub status: TestStatus,
pub passed: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
}
#[derive(Debug, Serialize)]
pub struct ErrorResponse {
pub error: String,
pub message: String,
pub details: Option<serde_json::Value>,
}
#[derive(Debug, thiserror::Error)]
pub enum ServiceError {
#[error("Tool unavailable: {0}")]
ToolUnavailable(String),
#[error("Invalid HGVS: {0}")]
InvalidHgvs(String),
#[error("Request timeout")]
Timeout,
#[error("Internal error: {0}")]
InternalError(String),
#[error("Configuration error: {0}")]
ConfigError(String),
#[error("Bad request: {0}")]
BadRequest(String),
#[error("Circuit breaker open - service temporarily unavailable")]
CircuitBreakerOpen,
}
impl ServiceError {
pub fn status_code(&self) -> u16 {
match self {
ServiceError::BadRequest(_) => 400,
ServiceError::InvalidHgvs(_) => 400,
ServiceError::Timeout => 408,
ServiceError::ConfigError(_) => 500,
ServiceError::InternalError(_) => 500,
ServiceError::ToolUnavailable(_) => 503,
ServiceError::CircuitBreakerOpen => 503,
}
}
pub fn to_response(&self) -> ErrorResponse {
ErrorResponse {
error: match self {
ServiceError::BadRequest(_) => "bad_request".to_string(),
ServiceError::InvalidHgvs(_) => "invalid_hgvs".to_string(),
ServiceError::Timeout => "timeout".to_string(),
ServiceError::ConfigError(_) => "config_error".to_string(),
ServiceError::InternalError(_) => "internal_error".to_string(),
ServiceError::ToolUnavailable(_) => "tool_unavailable".to_string(),
ServiceError::CircuitBreakerOpen => "circuit_breaker_open".to_string(),
},
message: self.to_string(),
details: None,
}
}
}
pub mod error_analysis {
use super::*;
pub trait ErrorAnalyzer {
fn analyze_error(&self, error: &str) -> StructuredErrorCategory;
}
pub struct FerroErrorAnalyzer;
impl ErrorAnalyzer for FerroErrorAnalyzer {
fn analyze_error(&self, error: &str) -> StructuredErrorCategory {
if let Some(error_code) = extract_ferro_error_code(error) {
match error_code / 1000 {
1 => {
match error_code {
1001 => {
StructuredErrorCategory::Parse(ParseErrorKind::InvalidAccession)
}
1002 => StructuredErrorCategory::Parse(ParseErrorKind::InvalidSyntax),
1003 => StructuredErrorCategory::Parse(ParseErrorKind::InvalidPosition),
1004 => StructuredErrorCategory::Parse(ParseErrorKind::InvalidEdit),
_ => StructuredErrorCategory::Parse(ParseErrorKind::InvalidSyntax),
}
}
2 => {
match error_code {
2001 => StructuredErrorCategory::Reference(
ReferenceErrorKind::SequenceNotFound,
),
2002 => StructuredErrorCategory::Reference(
ReferenceErrorKind::TranscriptNotFound,
),
2003 => StructuredErrorCategory::Reference(
ReferenceErrorKind::SequenceMismatch,
),
_ => StructuredErrorCategory::Reference(
ReferenceErrorKind::SequenceNotFound,
),
}
}
3 => {
match error_code {
3001 => StructuredErrorCategory::Validation(
ValidationErrorKind::PositionOutOfBounds,
),
3002 => StructuredErrorCategory::Validation(
ValidationErrorKind::InvalidRange,
),
3003 => StructuredErrorCategory::Validation(
ValidationErrorKind::UnsupportedVariant,
),
_ => StructuredErrorCategory::Validation(
ValidationErrorKind::PositionOutOfBounds,
),
}
}
_ => StructuredErrorCategory::Internal,
}
} else {
analyze_generic_error(error)
}
}
}
pub struct MutalyzerErrorAnalyzer;
impl ErrorAnalyzer for MutalyzerErrorAnalyzer {
fn analyze_error(&self, error: &str) -> StructuredErrorCategory {
let lower_error = error.to_lowercase();
if lower_error.contains("parse") || lower_error.contains("syntax") {
StructuredErrorCategory::Parse(ParseErrorKind::InvalidSyntax)
} else if lower_error.contains("sequence") && lower_error.contains("not") {
StructuredErrorCategory::Reference(ReferenceErrorKind::SequenceNotFound)
} else if lower_error.contains("transcript") && lower_error.contains("not") {
StructuredErrorCategory::Reference(ReferenceErrorKind::TranscriptNotFound)
} else if lower_error.contains("esequencemismatch") || lower_error.contains("mismatch")
{
StructuredErrorCategory::Reference(ReferenceErrorKind::SequenceMismatch)
} else if lower_error.contains("position") && lower_error.contains("out") {
StructuredErrorCategory::Validation(ValidationErrorKind::PositionOutOfBounds)
} else if lower_error.contains("unavailable") || lower_error.contains("connection") {
StructuredErrorCategory::Tool(ToolErrorKind::Unavailable)
} else {
analyze_generic_error(error)
}
}
}
pub struct BiocommonsErrorAnalyzer;
impl ErrorAnalyzer for BiocommonsErrorAnalyzer {
fn analyze_error(&self, error: &str) -> StructuredErrorCategory {
let lower_error = error.to_lowercase();
if lower_error.contains("parse") || lower_error.contains("invalid hgvs") {
StructuredErrorCategory::Parse(ParseErrorKind::InvalidSyntax)
} else if lower_error.contains("retrieval") || lower_error.contains("not found") {
StructuredErrorCategory::Reference(ReferenceErrorKind::SequenceNotFound)
} else if lower_error.contains("range") || lower_error.contains("bounds") {
StructuredErrorCategory::Validation(ValidationErrorKind::PositionOutOfBounds)
} else if lower_error.contains("not_supported") || lower_error.contains("unsupported") {
StructuredErrorCategory::Validation(ValidationErrorKind::UnsupportedVariant)
} else if lower_error.contains("esequencemismatch") || lower_error.contains("mismatch")
{
StructuredErrorCategory::Reference(ReferenceErrorKind::SequenceMismatch)
} else if lower_error.contains("subprocess") || lower_error.contains("python") {
StructuredErrorCategory::Tool(ToolErrorKind::ExecutionFailed)
} else {
analyze_generic_error(error)
}
}
}
pub struct HgvsRsErrorAnalyzer;
impl ErrorAnalyzer for HgvsRsErrorAnalyzer {
fn analyze_error(&self, error: &str) -> StructuredErrorCategory {
let lower_error = error.to_lowercase();
if lower_error.contains("parse") || lower_error.contains("syntax") {
StructuredErrorCategory::Parse(ParseErrorKind::InvalidSyntax)
} else if lower_error.contains("validation") {
StructuredErrorCategory::Validation(ValidationErrorKind::UnsupportedVariant)
} else if lower_error.contains("connection") || lower_error.contains("database") {
StructuredErrorCategory::Reference(ReferenceErrorKind::DatabaseError)
} else if lower_error.contains("transcript") {
StructuredErrorCategory::Reference(ReferenceErrorKind::TranscriptNotFound)
} else if lower_error.contains("intronic") {
StructuredErrorCategory::Validation(ValidationErrorKind::IntronicNotSupported)
} else if lower_error.contains("protein") {
StructuredErrorCategory::Validation(ValidationErrorKind::ProteinNotSupported)
} else if lower_error.contains("panic") || lower_error.contains("thread") {
StructuredErrorCategory::Internal
} else {
analyze_generic_error(error)
}
}
}
fn extract_ferro_error_code(error: &str) -> Option<u32> {
for word in error.split_whitespace() {
if let Some(code_part) = word.strip_prefix('E') {
if let Ok(code) = code_part.parse::<u32>() {
if (1000..=9999).contains(&code) {
return Some(code);
}
}
}
}
None
}
fn analyze_generic_error(error: &str) -> StructuredErrorCategory {
let lower_error = error.to_lowercase();
if lower_error.contains("timeout") {
StructuredErrorCategory::Timeout
} else if lower_error.contains("unavailable") || lower_error.contains("unreachable") {
StructuredErrorCategory::Tool(ToolErrorKind::Unavailable)
} else if lower_error.contains("configuration") || lower_error.contains("config") {
StructuredErrorCategory::Tool(ToolErrorKind::ConfigurationError)
} else {
StructuredErrorCategory::Internal
}
}
pub fn get_error_analyzer(tool: ToolName) -> Box<dyn ErrorAnalyzer> {
match tool {
ToolName::Ferro => Box::new(FerroErrorAnalyzer),
ToolName::Mutalyzer => Box::new(MutalyzerErrorAnalyzer),
ToolName::Biocommons => Box::new(BiocommonsErrorAnalyzer),
ToolName::HgvsRs => Box::new(HgvsRsErrorAnalyzer),
}
}
}
pub fn analyze_error_structured(tool: ToolName, error: &str) -> StructuredErrorCategory {
let analyzer = error_analysis::get_error_analyzer(tool);
analyzer.analyze_error(error)
}
pub fn categorize_error(tool: ToolName, error: &str) -> String {
analyze_error_structured(tool, error).to_string()
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum CoordinateSystem {
C,
G,
P,
N,
}
impl CoordinateSystem {
pub fn as_str(&self) -> &'static str {
match self {
CoordinateSystem::C => "c",
CoordinateSystem::G => "g",
CoordinateSystem::P => "p",
CoordinateSystem::N => "n",
}
}
}
impl std::fmt::Display for CoordinateSystem {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.as_str())
}
}
#[derive(Debug, Clone, Deserialize)]
pub struct ConvertRequest {
pub hgvs: String,
pub target_system: CoordinateSystem,
#[serde(default)]
pub include_all: bool,
}
#[derive(Debug, Serialize)]
pub struct ConvertResponse {
pub input: String,
pub source_system: String,
pub target_system: String,
pub converted: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub all_conversions: Option<Vec<ConversionResult>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
pub processing_time_ms: u64,
}
#[derive(Debug, Serialize)]
pub struct ConversionResult {
pub system: String,
pub hgvs: String,
pub reference: String,
}
#[derive(Debug, Clone, Deserialize)]
pub struct EffectRequest {
pub hgvs: String,
#[serde(default)]
pub include_nmd: bool,
}
#[derive(Debug, Serialize)]
pub struct EffectResponse {
pub input: String,
pub effect: Option<SequenceEffect>,
#[serde(skip_serializing_if = "Option::is_none")]
pub protein_consequence: Option<ProteinConsequence>,
#[serde(skip_serializing_if = "Option::is_none")]
pub nmd_prediction: Option<NmdPrediction>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
pub processing_time_ms: u64,
}
#[derive(Debug, Serialize)]
pub struct SequenceEffect {
pub so_term: String,
pub name: String,
pub description: String,
pub impact: String,
}
#[derive(Debug, Serialize)]
pub struct ProteinConsequence {
pub hgvs_p: String,
pub ref_aa: String,
pub alt_aa: String,
pub position: u64,
pub is_frameshift: bool,
}
#[derive(Debug, Serialize)]
pub struct NmdPrediction {
pub predicted: bool,
pub confidence: f64,
pub reason: String,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum GenomeBuild {
#[serde(rename = "GRCh37", alias = "hg19")]
GRCh37,
#[serde(rename = "GRCh38", alias = "hg38")]
GRCh38,
}
impl GenomeBuild {
pub fn as_str(&self) -> &'static str {
match self {
GenomeBuild::GRCh37 => "GRCh37",
GenomeBuild::GRCh38 => "GRCh38",
}
}
}
impl std::fmt::Display for GenomeBuild {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.as_str())
}
}
#[derive(Debug, Clone, Deserialize)]
pub struct LiftoverRequest {
pub position: String,
pub from_build: GenomeBuild,
pub to_build: GenomeBuild,
}
#[derive(Debug, Serialize)]
pub struct LiftoverResponse {
pub input: String,
pub from_build: String,
pub to_build: String,
pub converted: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub hgvs_g: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub chain_region: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
pub processing_time_ms: u64,
}
#[derive(Debug, Clone, Deserialize)]
pub struct VcfToHgvsRequest {
pub chrom: String,
pub pos: u64,
#[serde(rename = "ref")]
pub ref_allele: String,
pub alt: String,
#[serde(default = "default_grch38")]
pub build: GenomeBuild,
#[serde(skip_serializing_if = "Option::is_none")]
pub transcript: Option<String>,
}
fn default_grch38() -> GenomeBuild {
GenomeBuild::GRCh38
}
#[derive(Debug, Clone, Deserialize)]
pub struct HgvsToVcfRequest {
pub hgvs: String,
#[serde(default = "default_grch38")]
pub build: GenomeBuild,
}
#[derive(Debug, Serialize)]
pub struct VcfToHgvsResponse {
pub vcf: VcfRecord,
pub hgvs_g: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub hgvs_c: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub hgvs_p: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
pub processing_time_ms: u64,
}
#[derive(Debug, Serialize)]
pub struct HgvsToVcfResponse {
pub input: String,
pub vcf: Option<VcfRecord>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
pub processing_time_ms: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VcfRecord {
pub chrom: String,
pub pos: u64,
#[serde(rename = "ref")]
pub ref_allele: String,
pub alt: String,
pub build: String,
}
pub mod health_check {
use super::*;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum HealthCheckResult {
Healthy,
Degraded { reason: String },
Unhealthy { reason: String },
}
impl HealthCheckResult {
pub fn is_available(&self) -> bool {
matches!(
self,
HealthCheckResult::Healthy | HealthCheckResult::Degraded { .. }
)
}
pub fn status_string(&self) -> &'static str {
match self {
HealthCheckResult::Healthy => "healthy",
HealthCheckResult::Degraded { .. } => "degraded",
HealthCheckResult::Unhealthy { .. } => "unhealthy",
}
}
}
#[async_trait::async_trait]
pub trait HealthChecker {
async fn check_availability(&self) -> HealthCheckResult;
fn tool_name(&self) -> ToolName;
}
#[derive(Debug, Clone)]
pub struct HealthCheckConfig {
pub test_variant: String,
pub timeout_seconds: u64,
pub expected_behaviors: Vec<ExpectedBehavior>,
}
#[derive(Debug, Clone)]
pub enum ExpectedBehavior {
Success,
AcceptableFailure(Vec<StructuredErrorCategory>),
Responsive,
}
impl Default for HealthCheckConfig {
fn default() -> Self {
Self {
test_variant: "NM_000001.2:c.1A>G".to_string(),
timeout_seconds: 10,
expected_behaviors: vec![
ExpectedBehavior::Success,
ExpectedBehavior::AcceptableFailure(vec![
StructuredErrorCategory::Reference(ReferenceErrorKind::SequenceNotFound),
StructuredErrorCategory::Reference(ReferenceErrorKind::TranscriptNotFound),
StructuredErrorCategory::Validation(
ValidationErrorKind::UnsupportedVariant,
),
]),
ExpectedBehavior::Responsive,
],
}
}
}
}