use url::Url;
use crate::benchmark::{
biocommons::{normalize_single, BiocommonsLocalConfig},
types::ParseResult,
};
use crate::service::{
config::BiocommonsConfig,
tools::HgvsToolService,
types::{analyze_error_structured, health_check::HealthCheckResult, ServiceError, ToolName},
};
pub struct BiocommonsService {
_config: BiocommonsConfig,
biocommons_config: BiocommonsLocalConfig,
uta_schema: String,
}
impl BiocommonsService {
pub fn new(config: &BiocommonsConfig) -> Result<Self, ServiceError> {
let biocommons_config = BiocommonsLocalConfig {
uta_container_name: config
.docker_container
.clone()
.unwrap_or_else(|| "ferro-uta".to_string()),
uta_image_tag: "uta_20210129b".to_string(), uta_port: extract_port_from_url(&config.uta_url)?,
seqrepo_dir: config.seqrepo_path.clone(),
seqrepo_instance: "2021-01-29".to_string(), };
if !config.seqrepo_path.exists() {
return Err(ServiceError::ConfigError(format!(
"SeqRepo path does not exist: {}",
config.seqrepo_path.display()
)));
}
Ok(Self {
_config: config.clone(),
biocommons_config,
uta_schema: config.uta_schema.clone(),
})
}
async fn run_biocommons(
&self,
hgvs: &str,
_is_normalize: bool,
) -> Result<ParseResult, ServiceError> {
let hgvs = hgvs.to_string();
let config = self.biocommons_config.clone();
let uta_schema = self.uta_schema.clone();
tokio::task::spawn_blocking(move || {
let uta_db_url = format!(
"postgresql://anonymous:anonymous@localhost:{}/uta/{}",
config.uta_port, uta_schema
);
let result = normalize_single(
&hgvs,
Some(&uta_db_url),
Some(&config.seqrepo_dir.to_string_lossy()),
None, );
match result {
Ok(parse_result) => Ok(parse_result),
Err(e) => {
Ok(ParseResult {
input: hgvs,
success: false,
output: None,
error: Some(e.to_string()),
error_category: Some("biocommons_error".to_string()),
ref_mismatch: None,
details: None,
})
}
}
})
.await
.map_err(|e| ServiceError::InternalError(format!("Task join error: {}", e)))?
}
}
#[async_trait::async_trait]
impl HgvsToolService for BiocommonsService {
async fn parse(&self, hgvs: &str) -> Result<ParseResult, ServiceError> {
self.run_biocommons(hgvs, false).await
}
async fn normalize(&self, hgvs: &str) -> Result<ParseResult, ServiceError> {
self.run_biocommons(hgvs, true).await
}
async fn health_check(&self) -> HealthCheckResult {
let test_variant = "NM_000088.3:c.589G>T";
match self.run_biocommons(test_variant, false).await {
Ok(result) => {
if result.success {
HealthCheckResult::Healthy
} else if let Some(error) = &result.error {
let error_category = analyze_error_structured(ToolName::Biocommons, error);
match error_category {
crate::service::types::StructuredErrorCategory::Reference(_) => {
HealthCheckResult::Degraded {
reason: "Reference database issues, but tool is responding"
.to_string(),
}
}
crate::service::types::StructuredErrorCategory::Validation(_) => {
HealthCheckResult::Degraded {
reason: "Tool validation rules active, service operational"
.to_string(),
}
}
crate::service::types::StructuredErrorCategory::Parse(_)
| crate::service::types::StructuredErrorCategory::Tool(_)
| crate::service::types::StructuredErrorCategory::Internal => {
HealthCheckResult::Unhealthy {
reason: format!("Tool error: {}", error),
}
}
crate::service::types::StructuredErrorCategory::Timeout => {
HealthCheckResult::Unhealthy {
reason: "Tool not responding within timeout".to_string(),
}
}
}
} else {
HealthCheckResult::Unhealthy {
reason: "Tool failed without error message".to_string(),
}
}
}
Err(e) => HealthCheckResult::Unhealthy {
reason: format!("Service error: {}", e),
},
}
}
fn tool_name(&self) -> ToolName {
ToolName::Biocommons
}
}
fn extract_port_from_url(url: &str) -> Result<u16, ServiceError> {
let parsed = Url::parse(url)
.map_err(|e| ServiceError::ConfigError(format!("Invalid URL format '{}': {}", url, e)))?;
if parsed.scheme() != "postgresql" {
return Err(ServiceError::ConfigError(format!(
"Only postgresql URLs are allowed, got scheme: {}",
parsed.scheme()
)));
}
let host = parsed.host_str().unwrap_or("");
if host != "localhost" && host != "127.0.0.1" {
return Err(ServiceError::ConfigError(format!(
"Only localhost connections are allowed for security, got host: {}",
host
)));
}
let port = parsed.port().unwrap_or(5432);
if port < 1024 {
return Err(ServiceError::ConfigError(format!(
"Port must be >= 1024 for security, got: {}",
port
)));
}
Ok(port)
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
#[test]
fn test_extract_port_from_url() {
let url = "postgresql://anonymous:anonymous@localhost:5432/uta/uta_20210129b";
assert_eq!(extract_port_from_url(url).unwrap(), 5432);
let url2 = "postgresql://user:pass@127.0.0.1:1234/db";
assert_eq!(extract_port_from_url(url2).unwrap(), 1234);
let url3 = "postgresql://user:pass@localhost/db";
assert_eq!(extract_port_from_url(url3).unwrap(), 5432);
}
#[test]
fn test_extract_port_from_url_security_validation() {
let url = "mysql://user:pass@localhost:3306/db";
assert!(extract_port_from_url(url).is_err());
let url2 = "postgresql://user:pass@example.com:5432/db";
assert!(extract_port_from_url(url2).is_err());
let url3 = "postgresql://user:pass@localhost:22/db";
assert!(extract_port_from_url(url3).is_err());
let url4 = "not-a-valid-url";
assert!(extract_port_from_url(url4).is_err());
}
#[test]
fn test_biocommons_service_creation() {
let config = BiocommonsConfig {
enabled: true,
uta_url: "postgresql://anonymous:anonymous@localhost:5432/uta/uta_20210129b"
.to_string(),
uta_schema: "uta_20210129b".to_string(),
seqrepo_path: PathBuf::from("/nonexistent/seqrepo/path"), docker_container: Some("test-uta".to_string()),
parallel_workers: Some(1),
env_vars: None,
};
let result = BiocommonsService::new(&config);
assert!(result.is_err());
}
}