use std::path::PathBuf;
use std::sync::Arc;
use axum::{
Json, Router,
extract::{Path, Query, State},
http::{HeaderMap, StatusCode, header},
response::{IntoResponse, Redirect, Response},
routing::{get, post},
};
use clap::{Args, Parser, Subcommand};
use serde::{Deserialize, Serialize};
use std::net::SocketAddr;
use tokio::signal::unix::{SignalKind, signal};
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
mod db;
mod error;
mod negotiate;
use error::AppError;
use negotiate::NegotiateResult;
#[derive(Parser)]
#[command(name = "dragoman", about = "PID redirection and content negotiation server")]
struct Cli {
#[command(subcommand)]
command: Command,
}
#[derive(Subcommand)]
enum Command {
Start(StartArgs),
Stop(StopArgs),
}
#[derive(Args)]
struct StartArgs {
#[arg(short, long, env = "PORT", default_value_t = 3456)]
port: u16,
#[arg(short, long, env = "COMMONMETA_DB")]
db: Option<PathBuf>,
#[arg(long, env = "DRAGOMAN_CACHE_DB")]
cache_db: Option<PathBuf>,
#[arg(long, env = "DRAGOMAN_PID_FILE")]
pid_file: Option<PathBuf>,
}
#[derive(Args)]
struct StopArgs {
#[arg(long, env = "DRAGOMAN_PID_FILE", default_value = "/tmp/dragoman.pid")]
pid_file: PathBuf,
}
#[tokio::main]
async fn main() {
tracing_subscriber::registry()
.with(
tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| "dragoman=info".into()),
)
.with(tracing_subscriber::fmt::layer())
.init();
let cli = Cli::parse();
match cli.command {
Command::Start(args) => cmd_start(args).await,
Command::Stop(args) => cmd_stop(args),
}
}
fn platform_default_db_path() -> PathBuf {
#[cfg(target_os = "macos")]
{
let home = std::env::var("HOME").unwrap_or_default();
PathBuf::from(format!(
"{}/Library/Application Support/commonmeta/commonmeta.sqlite3",
home
))
}
#[cfg(target_os = "linux")]
{
PathBuf::from("/var/lib/commonmeta/commonmeta.sqlite3")
}
#[cfg(not(any(target_os = "macos", target_os = "linux")))]
{
PathBuf::from("commonmeta.sqlite3")
}
}
fn platform_default_cache_path() -> PathBuf {
platform_default_db_path()
.with_file_name("cache.sqlite3")
}
async fn cmd_start(args: StartArgs) {
let db_path = args.db.unwrap_or_else(platform_default_db_path);
let database = {
let path = db_path.clone();
match tokio::task::spawn_blocking(move || db::open(&path))
.await
.expect("spawn_blocking panicked")
{
Ok(Some(p)) => {
tracing::info!(path = %p.display(), "using local sqlite database");
Some(Arc::new(p))
}
Ok(None) => None,
Err(e) => {
tracing::error!(error = %e, "failed to open database");
std::process::exit(1);
}
}
};
let cache_path = args.cache_db.unwrap_or_else(platform_default_cache_path);
let cache = {
let path = cache_path.clone();
match tokio::task::spawn_blocking(move || db::cache_open(&path))
.await
.expect("spawn_blocking panicked")
{
Ok(p) => {
tracing::info!(path = %p.display(), "using metadata cache database");
Some(Arc::new(p))
}
Err(e) => {
tracing::warn!(path = %cache_path.display(), error = %e, "cache database unavailable, caching disabled");
None
}
}
};
if let Some(ref path) = args.pid_file {
if let Err(e) = std::fs::write(path, std::process::id().to_string()) {
tracing::error!(path = %path.display(), error = %e, "failed to write PID file");
std::process::exit(1);
}
tracing::info!(path = %path.display(), "wrote PID file");
}
let app = build_app(AppState { db: database, cache });
let addr = SocketAddr::from(([0, 0, 0, 0], args.port));
let listener = match tokio::net::TcpListener::bind(addr).await {
Ok(l) => l,
Err(e) => {
tracing::error!(port = args.port, error = %e, "failed to bind");
if let Some(ref path) = args.pid_file {
std::fs::remove_file(path).ok();
}
std::process::exit(1);
}
};
tracing::info!("listening on {addr}");
let pid_file = args.pid_file.clone();
let shutdown = async move {
let mut sigterm = match signal(SignalKind::terminate()) {
Ok(s) => s,
Err(e) => {
tracing::error!(error = %e, "failed to register SIGTERM handler");
std::process::exit(1);
}
};
tokio::select! {
_ = tokio::signal::ctrl_c() => {}
_ = sigterm.recv() => {}
}
if let Some(ref path) = pid_file {
std::fs::remove_file(path).ok();
tracing::info!(path = %path.display(), "removed PID file");
}
};
if let Err(e) = axum::serve(listener, app)
.with_graceful_shutdown(shutdown)
.await
{
tracing::error!(error = %e, "server error");
std::process::exit(1);
}
}
fn cmd_stop(args: StopArgs) {
let pid_str = match std::fs::read_to_string(&args.pid_file) {
Ok(s) => s,
Err(e) => {
eprintln!(
"error: cannot read PID file '{}': {e}",
args.pid_file.display()
);
std::process::exit(1);
}
};
let pid: u32 = match pid_str.trim().parse() {
Ok(p) => p,
Err(_) => {
eprintln!(
"error: PID file '{}' does not contain a valid PID",
args.pid_file.display()
);
std::process::exit(1);
}
};
let status = std::process::Command::new("kill")
.arg(pid.to_string())
.status();
match status {
Ok(s) if s.success() => {
println!("sent SIGTERM to process {pid}");
std::fs::remove_file(&args.pid_file).ok();
}
Ok(_) => {
eprintln!("error: kill({pid}) failed — process may have already exited");
std::process::exit(1);
}
Err(e) => {
eprintln!("error: failed to run kill: {e}");
std::process::exit(1);
}
}
}
#[derive(Clone)]
struct AppState {
db: Option<Arc<PathBuf>>,
cache: Option<Arc<PathBuf>>,
}
#[derive(Deserialize)]
struct PidQuery {
format: Option<String>,
source: Option<String>,
style: Option<String>,
locale: Option<String>,
}
async fn index() -> impl IntoResponse {
(
[(header::CONTENT_TYPE, "text/html; charset=utf-8")],
include_str!("../ui/dist/index.html"),
)
}
async fn handle_pid(
State(state): State<AppState>,
Path(path): Path<String>,
Query(query): Query<PidQuery>,
headers: HeaderMap,
) -> Result<Response, AppError> {
let accept = headers
.get(header::ACCEPT)
.and_then(|v| v.to_str().ok())
.unwrap_or("text/html");
let wants_html = query.format.is_none()
&& matches!(negotiate::negotiate(accept), NegotiateResult::Redirect);
if is_ror_id(&path) {
if wants_html {
let ror_url = format!("https://ror.org/{path}");
let (org_data, org_types, works) =
fetch_ror_data(ror_url, state.db.as_deref().cloned()).await?;
let bytes = ror_to_commonmeta_json(&org_data, &org_types, &works)?;
return entity_page_html(bytes);
}
return Ok(Redirect::temporary(&format!("/ror/{path}")).into_response());
}
if is_orcid_id(&path) {
if wants_html {
let orcid_url = format!("https://orcid.org/{path}");
let (person_data, works) =
fetch_orcid_data(orcid_url, state.db.as_deref().cloned()).await?;
let bytes = orcid_to_commonmeta_json(&person_data, &works)?;
return entity_page_html(bytes);
}
return Ok(Redirect::temporary(&format!("/orcid/{path}")).into_response());
}
let doi = commonmeta::doi_utils::validate_doi(&path)
.ok_or_else(|| AppError::NotFound(path.clone()))?;
tracing::info!(doi = %doi, "resolving PID");
let (format, content_type, style, locale): (
String,
&'static str,
Option<String>,
Option<String>,
) = if let Some(fmt) = &query.format {
let ct = negotiate::content_type_for_format(fmt)
.ok_or_else(|| AppError::UnsupportedFormat(fmt.clone()))?;
(fmt.clone(), ct, query.style.clone(), query.locale.clone())
} else {
match negotiate::negotiate(accept) {
NegotiateResult::Format(n) => (
n.format.to_string(),
n.content_type,
n.style.or_else(|| query.style.clone()),
n.locale.or_else(|| query.locale.clone()),
),
NegotiateResult::Redirect => {
let bytes = fetch_and_convert(
&doi,
"commonmeta",
query.source.as_deref(),
None,
None,
state.db.as_deref(),
state.cache.as_deref(),
)
.await?;
return entity_page_html(bytes);
}
NegotiateResult::NotAcceptable => {
return Err(AppError::UnsupportedFormat(accept.to_string()));
}
}
};
let bytes = fetch_and_convert(
&doi,
&format,
query.source.as_deref(),
style.as_deref(),
locale.as_deref(),
state.db.as_deref(),
state.cache.as_deref(),
)
.await?;
Ok((StatusCode::OK, [(header::CONTENT_TYPE, content_type)], bytes).into_response())
}
async fn fetch_and_convert(
doi: &str,
format: &str,
source: Option<&str>,
style: Option<&str>,
locale: Option<&str>,
db: Option<&PathBuf>,
cache: Option<&PathBuf>,
) -> Result<Vec<u8>, AppError> {
let doi = doi.to_string();
let format = format.to_string();
let source = source.map(str::to_string);
let style = style.map(str::to_string);
let locale = locale.map(str::to_string);
let db = db.cloned();
let cache = cache.cloned();
tokio::task::spawn_blocking(move || {
if let Some(ref path) = db {
if let Some(data) = db::lookup(path, &doi)? {
tracing::debug!(doi = %doi, "served from local sqlite");
let data = enrich_citations(data, &db, &doi);
return commonmeta::write_with_style(
&format,
&data,
style.as_deref(),
locale.as_deref(),
)
.map_err(|e| AppError::FetchError(e.to_string()));
}
}
if let Some(ref path) = cache {
if let Some(data) = db::cache_lookup(path, &doi)? {
tracing::debug!(doi = %doi, "served from cache sqlite");
let data = enrich_citations(data, &db, &doi);
return commonmeta::write_with_style(
&format,
&data,
style.as_deref(),
locale.as_deref(),
)
.map_err(|e| AppError::FetchError(e.to_string()));
}
}
let from = resolve_source(&doi, source.as_deref());
let data = commonmeta::read(&from, &doi)
.map_err(|e| AppError::FetchError(e.to_string()))?;
if let Some(ref path) = cache {
if let Err(e) = db::cache_insert(path, &data, &from) {
tracing::warn!(doi = %doi, error = %e, "failed to cache fetched metadata");
} else {
tracing::debug!(doi = %doi, "cached fetched metadata");
}
}
let data = enrich_citations(data, &db, &doi);
commonmeta::write_with_style(&format, &data, style.as_deref(), locale.as_deref())
.map_err(|e| AppError::FetchError(e.to_string()))
})
.await
.map_err(|e| AppError::Internal(e.to_string()))?
}
fn enrich_citations(mut data: commonmeta::Data, db: &Option<PathBuf>, doi: &str) -> commonmeta::Data {
if let Some(path) = db {
if let Ok(citing) = db::lookup_citing_dois(path, doi) {
if !citing.is_empty() {
data.citations = citing;
}
}
}
data
}
#[derive(Deserialize)]
struct BibliographyRequest {
items: Vec<BibRequestItem>,
style: Option<String>,
locale: Option<String>,
}
#[derive(Deserialize)]
struct BibRequestItem {
id: String,
data: String, }
#[derive(Serialize)]
struct BibliographyResponse {
items: Vec<BibResponseItem>,
}
#[derive(Serialize)]
struct BibResponseItem {
id: String,
html: String,
}
async fn handle_bibliography(
Json(req): Json<BibliographyRequest>,
) -> Result<Json<BibliographyResponse>, AppError> {
let style = req.style;
let locale = req.locale;
let items = req.items;
let results = tokio::task::spawn_blocking(move || {
items
.into_iter()
.map(|item| {
let first_id = serde_json::from_str::<serde_json::Value>(&item.data)
.ok()
.and_then(|v| {
v.as_array()?
.first()?
.get("id")?
.as_str()
.map(String::from)
})
.unwrap_or_default();
if first_id.starts_with("https://ror.org/")
|| first_id.starts_with("https://orcid.org/")
{
return Ok(BibResponseItem { id: item.id, html: String::new() });
}
let data = commonmeta::read("commonmeta", &item.data)
.map_err(|e| AppError::FetchError(e.to_string()))?;
let bytes = commonmeta::write_with_style(
"citation",
&data,
style.as_deref(),
locale.as_deref(),
)
.map_err(|e| AppError::FetchError(e.to_string()))?;
Ok(BibResponseItem {
id: item.id,
html: String::from_utf8_lossy(&bytes).into_owned(),
})
})
.collect::<Result<Vec<_>, AppError>>()
})
.await
.map_err(|e| AppError::Internal(e.to_string()))??;
Ok(Json(BibliographyResponse { items: results }))
}
async fn handle_pmid(
State(state): State<AppState>,
Path(pmid): Path<String>,
Query(query): Query<PidQuery>,
) -> Result<Response, AppError> {
let fmt = query.format.clone();
let db = state.db.as_deref().cloned();
let id = pmid.clone();
let data = tokio::task::spawn_blocking(move || -> Result<commonmeta::Data, AppError> {
if let Some(ref path) = db {
if let Some(d) = db::lookup_by_pmid(path, &id)? {
return Ok(d);
}
}
tracing::debug!(pmid = %id, "pmid fetching from europepmc");
commonmeta::read("pubmed", &id).map_err(|e| AppError::FetchError(e.to_string()))
})
.await
.map_err(|e| AppError::Internal(e.to_string()))??;
format_or_redirect(data, fmt.as_deref())
}
async fn handle_pmcid(
State(state): State<AppState>,
Path(pmcid): Path<String>,
Query(query): Query<PidQuery>,
) -> Result<Response, AppError> {
let fmt = query.format.clone();
let db = state.db.as_deref().cloned();
let id = pmcid.clone();
let data = tokio::task::spawn_blocking(move || -> Result<commonmeta::Data, AppError> {
if let Some(ref path) = db {
if let Some(d) = db::lookup_by_pmcid(path, &id)? {
return Ok(d);
}
}
tracing::debug!(pmcid = %id, "pmcid fetching from europepmc");
commonmeta::read("pubmed", &id).map_err(|e| AppError::FetchError(e.to_string()))
})
.await
.map_err(|e| AppError::Internal(e.to_string()))??;
format_or_redirect(data, fmt.as_deref())
}
async fn handle_arxiv(
State(state): State<AppState>,
Path(arxiv_id): Path<String>,
Query(query): Query<PidQuery>,
) -> Result<Response, AppError> {
let fmt = query.format.clone();
let db = state.db.as_deref().cloned();
let id = arxiv_id.clone();
let data = tokio::task::spawn_blocking(move || -> Result<commonmeta::Data, AppError> {
if let Some(ref path) = db {
if let Some(d) = db::lookup_by_arxiv(path, &id)? {
return Ok(d);
}
}
let doi = format!("10.48550/arXiv.{id}");
tracing::debug!(arxiv = %id, doi = %doi, "arxiv fetching from datacite");
commonmeta::read("datacite", &doi).map_err(|e| AppError::FetchError(e.to_string()))
})
.await
.map_err(|e| AppError::Internal(e.to_string()))??;
format_or_redirect(data, fmt.as_deref())
}
fn format_or_redirect(data: commonmeta::Data, fmt: Option<&str>) -> Result<Response, AppError> {
if let Some(format) = fmt {
let ct = negotiate::content_type_for_format(format)
.ok_or_else(|| AppError::UnsupportedFormat(format.to_string()))?;
let bytes = commonmeta::write_with_style(format, &data, None, None)
.map_err(|e| AppError::FetchError(e.to_string()))?;
return Ok((StatusCode::OK, [(header::CONTENT_TYPE, ct)], bytes).into_response());
}
if let Some(doi) = data.id.strip_prefix("https://doi.org/") {
return Ok(Redirect::temporary(&format!("/{doi}")).into_response());
}
if let Some(ror_id) = data.id.strip_prefix("https://ror.org/") {
return Ok(Redirect::temporary(&format!("/{ror_id}")).into_response());
}
if let Some(orcid_id) = data.id.strip_prefix("https://orcid.org/") {
return Ok(Redirect::temporary(&format!("/{orcid_id}")).into_response());
}
Err(AppError::NotFound(data.id))
}
async fn handle_openalex(
State(state): State<AppState>,
Path(id): Path<String>,
Query(query): Query<PidQuery>,
) -> Result<Response, AppError> {
let format = query.format.as_deref().unwrap_or("commonmeta").to_string();
let content_type = negotiate::content_type_for_format(&format)
.ok_or_else(|| AppError::UnsupportedFormat(format.clone()))?;
let db = state.db.as_deref().cloned();
let cache = state.cache.as_deref().cloned();
let id2 = id.clone();
let fmt2 = format.clone();
let bytes = tokio::task::spawn_blocking(move || {
if let Some(ref path) = db {
if let Some(data) = db::lookup_by_openalex(path, &id2)? {
tracing::debug!(id = %id2, "openalex served from local sqlite");
return commonmeta::write_with_style(&fmt2, &data, None, None)
.map_err(|e| AppError::FetchError(e.to_string()));
}
}
tracing::debug!(id = %id2, "openalex fetching from remote");
let data = commonmeta::read("openalex", &id2)
.map_err(|e| AppError::FetchError(e.to_string()))?;
if let Some(ref path) = cache {
if let Err(e) = db::cache_insert(path, &data, "openalex") {
tracing::warn!(id = %id2, error = %e, "failed to cache openalex metadata");
}
}
commonmeta::write_with_style(&fmt2, &data, None, None)
.map_err(|e| AppError::FetchError(e.to_string()))
})
.await
.map_err(|e| AppError::Internal(e.to_string()))??;
Ok((StatusCode::OK, [(header::CONTENT_TYPE, content_type)], bytes).into_response())
}
fn ror_to_commonmeta_json(
data: &commonmeta::Data,
org_types: &[String],
works: &[commonmeta::Data],
) -> Result<Vec<u8>, AppError> {
use serde_json::{Map, Value};
const VALID_ID_TYPES: &[&str] = &[
"ARK", "arXiv", "article_id", "Bibcode", "DOI", "FundRef", "GRID", "Handle",
"ISBN", "ISNI", "ISSN", "OpenAlex", "PMID", "PMCID", "PURL", "RAiD",
"ResearcherID", "ROR", "ScopusID", "SWHID", "URL", "URN", "UUID", "GUID",
"Wikidata", "Other",
];
let mut obj = Map::new();
obj.insert("id".to_string(), Value::String(data.id.clone()));
if !data.title.is_empty() {
obj.insert("name".to_string(), Value::String(data.title.clone()));
}
if !data.acronym.is_empty() {
obj.insert("acronym".to_string(), Value::String(data.acronym.clone()));
}
if !data.additional_names.is_empty() {
obj.insert("additional_names".to_string(), serde_json::json!(data.additional_names));
}
if let Some(year) = data.established {
obj.insert("established".to_string(), Value::Number(year.into()));
}
if !data.country.is_empty() {
obj.insert("country".to_string(), Value::String(data.country.clone()));
}
if !data.url.is_empty() {
obj.insert("urls".to_string(), serde_json::json!([{"url": data.url}]));
}
let ids: Vec<Value> = data
.identifiers
.iter()
.filter(|id| !id.identifier.is_empty())
.map(|id| {
let id_type = match id.identifier_type.as_str() {
"Crossref Funder ID" => "FundRef",
t if VALID_ID_TYPES.contains(&t) => t,
_ => "Other",
};
let mut m = Map::new();
m.insert("identifier".to_string(), Value::String(id.identifier.clone()));
m.insert("identifier_type".to_string(), Value::String(id_type.to_string()));
Value::Object(m)
})
.collect();
if !ids.is_empty() {
obj.insert("identifiers".to_string(), Value::Array(ids));
}
let types: &[String] = if !org_types.is_empty() { org_types } else { &data.types };
if !types.is_empty() {
obj.insert(
"types".to_string(),
Value::Array(types.iter().map(|t| Value::String(t.clone())).collect()),
);
}
let mut arr = vec![Value::Object(obj)];
for work in works {
let prepared = commonmeta::prepare_commonmeta(work);
let work_val = serde_json::to_value(&prepared)
.map_err(|e| AppError::FetchError(e.to_string()))?;
arr.push(work_val);
}
serde_json::to_vec_pretty(&Value::Array(arr))
.map_err(|e| AppError::FetchError(e.to_string()))
}
async fn handle_ror(
State(state): State<AppState>,
Path(id): Path<String>,
Query(query): Query<PidQuery>,
) -> Result<Response, AppError> {
let ror_url = format!("https://ror.org/{id}");
let (org_data, org_types, works) =
fetch_ror_data(ror_url, state.db.as_deref().cloned()).await?;
let fmt = query.format.as_deref().unwrap_or("commonmeta");
if fmt == "commonmeta" {
let bytes = ror_to_commonmeta_json(&org_data, &org_types, &works)?;
return Ok((
StatusCode::OK,
[(header::CONTENT_TYPE, "application/json; charset=utf-8")],
bytes,
)
.into_response());
}
format_or_redirect(org_data, Some(fmt))
}
async fn handle_orcid(
State(state): State<AppState>,
Path(id): Path<String>,
Query(query): Query<PidQuery>,
) -> Result<Response, AppError> {
let orcid_url = format!("https://orcid.org/{id}");
let fmt = query.format.as_deref().unwrap_or("commonmeta");
if fmt == "commonmeta" {
let (person_data, works) =
fetch_orcid_data(orcid_url, state.db.as_deref().cloned()).await?;
let bytes = orcid_to_commonmeta_json(&person_data, &works)?;
return Ok((
StatusCode::OK,
[(header::CONTENT_TYPE, "application/json; charset=utf-8")],
bytes,
)
.into_response());
}
let fmt = fmt.to_string();
let data = tokio::task::spawn_blocking(move || {
commonmeta::fetch_orcid(&orcid_url).map_err(|e| AppError::FetchError(e.to_string()))
})
.await
.map_err(|e| AppError::Internal(e.to_string()))??;
format_or_redirect(data, Some(&fmt))
}
async fn handle_dois(
State(state): State<AppState>,
Path(path): Path<String>,
Query(query): Query<PidQuery>,
) -> Result<Response, AppError> {
handle_pid_with_format(state, path, query, "datacite").await
}
async fn handle_works(
State(state): State<AppState>,
Path(path): Path<String>,
Query(query): Query<PidQuery>,
) -> Result<Response, AppError> {
handle_pid_with_format(state, path, query, "crossref").await
}
async fn handle_pid_with_format(
state: AppState,
path: String,
query: PidQuery,
format: &'static str,
) -> Result<Response, AppError> {
let doi = commonmeta::doi_utils::validate_doi(&path)
.ok_or_else(|| AppError::NotFound(path.clone()))?;
let content_type = negotiate::content_type_for_format(format)
.ok_or_else(|| AppError::UnsupportedFormat(format.to_string()))?;
tracing::info!(doi = %doi, format, "resolving PID");
let bytes = fetch_and_convert(
&doi,
format,
query.source.as_deref(),
None,
None,
state.db.as_deref(),
state.cache.as_deref(),
)
.await?;
Ok((StatusCode::OK, [(header::CONTENT_TYPE, content_type)], bytes).into_response())
}
fn build_app(state: AppState) -> Router {
Router::new()
.route("/", get(index))
.route("/commonmeta_v1.0.json", get(schema_json))
.route("/status", get(handle_status))
.route("/random", get(handle_random))
.route("/bibliography", post(handle_bibliography))
.route("/pmid/{pmid}", get(handle_pmid))
.route("/pmcid/{pmcid}", get(handle_pmcid))
.route("/arxiv/{id}", get(handle_arxiv))
.route("/openalex/{id}", get(handle_openalex))
.route("/ror/{id}", get(handle_ror))
.route("/orcid/{id}", get(handle_orcid))
.route("/dois/{*path}", get(handle_dois))
.route("/works/{*path}", get(handle_works))
.route("/{*path}", get(handle_pid))
.with_state(state)
}
async fn schema_json() -> impl axum::response::IntoResponse {
(
[(axum::http::header::CONTENT_TYPE, "application/json")],
commonmeta::SCHEMA_JSON,
)
}
async fn handle_status(
State(state): State<AppState>,
) -> Json<serde_json::Value> {
Json(serde_json::json!({ "db": state.db.is_some() }))
}
async fn handle_random(
State(state): State<AppState>,
) -> Result<Json<serde_json::Value>, AppError> {
let Some(db) = state.db else {
return Err(AppError::NotFound("no local database".to_string()));
};
let id = tokio::task::spawn_blocking(move || db::random_id(&db))
.await
.map_err(|e| AppError::Internal(e.to_string()))??;
match id {
Some(id) => Ok(Json(serde_json::json!({ "id": id }))),
None => Err(AppError::NotFound("no records found".to_string())),
}
}
fn is_ror_id(s: &str) -> bool {
s.len() == 9 && s.chars().all(|c| c.is_ascii_lowercase() || c.is_ascii_digit())
}
fn is_orcid_id(s: &str) -> bool {
let parts: Vec<&str> = s.split('-').collect();
if parts.len() != 4 {
return false;
}
parts[..3].iter().all(|p| p.len() == 4 && p.chars().all(|c| c.is_ascii_digit()))
&& parts[3].len() == 4
&& {
let b = parts[3].as_bytes();
b[..3].iter().all(|c| c.is_ascii_digit()) && (b[3].is_ascii_digit() || b[3] == b'X')
}
}
fn entity_page_html(json_bytes: Vec<u8>) -> Result<Response, AppError> {
let html = include_str!("../ui/dist/index.html");
let json_str = String::from_utf8(json_bytes).map_err(|e| AppError::Internal(e.to_string()))?;
let script = format!("<script>window.__DRAGOMAN_INIT__={};</script>", json_str);
let page = html.replacen("</head>", &format!("{}</head>", script), 1);
Ok((
StatusCode::OK,
[(header::CONTENT_TYPE, "text/html; charset=utf-8")],
page,
)
.into_response())
}
fn orcid_to_commonmeta_json(
person_data: &commonmeta::Data,
works: &[commonmeta::Data],
) -> Result<Vec<u8>, AppError> {
use serde_json::Value;
let person_val = serde_json::to_value(commonmeta::prepare_commonmeta(person_data))
.map_err(|e| AppError::FetchError(e.to_string()))?;
let mut arr = vec![person_val];
for work in works {
let prepared = commonmeta::prepare_commonmeta(work);
let work_val =
serde_json::to_value(&prepared).map_err(|e| AppError::FetchError(e.to_string()))?;
arr.push(work_val);
}
serde_json::to_vec_pretty(&Value::Array(arr))
.map_err(|e| AppError::FetchError(e.to_string()))
}
async fn fetch_orcid_data(
orcid_url: String,
db: Option<PathBuf>,
) -> Result<(commonmeta::Data, Vec<commonmeta::Data>), AppError> {
tokio::task::spawn_blocking(move || {
let data = commonmeta::fetch_orcid(&orcid_url)
.map_err(|e| AppError::FetchError(e.to_string()))?;
let sqlite_works = if let Some(ref path) = db {
commonmeta::read_sqlite_by_orcid(&orcid_url, path).unwrap_or_default()
} else {
Vec::new()
};
let mut works = sqlite_works;
if works.len() < 100 {
let mut cr =
commonmeta::fetch_crossref_by_orcid(&orcid_url, 50, 1).unwrap_or_default();
let mut dc =
commonmeta::fetch_datacite_by_orcid(&orcid_url, 50, 1).unwrap_or_default();
cr.append(&mut dc);
works.append(&mut cr);
}
let mut seen = std::collections::HashSet::new();
works.retain(|w| seen.insert(w.id.clone()));
works.sort_by(|a, b| b.date_published.cmp(&a.date_published));
works.truncate(100);
Ok((data, works))
})
.await
.map_err(|e| AppError::Internal(e.to_string()))?
}
async fn fetch_ror_data(
ror_url: String,
db: Option<PathBuf>,
) -> Result<(commonmeta::Data, Vec<String>, Vec<commonmeta::Data>), AppError> {
tokio::task::spawn_blocking(move || {
let data = if let Some(ref path) = db {
if let Some(d) = db::lookup_by_ror(path, &ror_url)? {
tracing::debug!(id = %ror_url, "ror served from local sqlite");
d
} else {
commonmeta::fetch_ror(&ror_url).map_err(|e| AppError::FetchError(e.to_string()))?
}
} else {
commonmeta::fetch_ror(&ror_url).map_err(|e| AppError::FetchError(e.to_string()))?
};
let org_types = if let Some(ref path) = db {
db::lookup_ror_types(path, &ror_url).unwrap_or_default()
} else {
Vec::new()
};
let sqlite_works = if let Some(ref path) = db {
commonmeta::read_sqlite_by_ror(&ror_url, path).unwrap_or_default()
} else {
Vec::new()
};
let mut works = sqlite_works;
if works.len() < 100 {
let mut cr = commonmeta::fetch_crossref_by_ror(&ror_url, 50, 1).unwrap_or_default();
let mut dc = commonmeta::fetch_datacite_by_ror(&ror_url, 50, 1).unwrap_or_default();
cr.append(&mut dc);
works.append(&mut cr);
}
let mut seen = std::collections::HashSet::new();
works.retain(|w| seen.insert(w.id.clone()));
works.sort_by(|a, b| b.date_published.cmp(&a.date_published));
works.truncate(100);
Ok((data, org_types, works))
})
.await
.map_err(|e| AppError::Internal(e.to_string()))?
}
fn resolve_source(doi: &str, source: Option<&str>) -> String {
if let Some(s) = source {
return s.to_lowercase();
}
match commonmeta::doi_utils::get_doi_ra_sync(doi, false)
.as_deref()
.map(str::to_lowercase)
.as_deref()
{
Some("crossref") => "crossref".to_string(),
Some("datacite") => "datacite".to_string(),
_ => "crossref".to_string(),
}
}
#[cfg(test)]
mod tests {
use super::*;
use axum::{body::Body, http::Request};
use tower::ServiceExt;
fn make_test_db() -> (tempfile::TempDir, Arc<PathBuf>) {
let dir = tempfile::tempdir().unwrap();
let path = db::tests::make_test_db(&dir.path().join("test.sqlite3"));
(dir, Arc::new(path))
}
fn make_test_cache_db() -> (tempfile::TempDir, Arc<PathBuf>) {
let dir = tempfile::tempdir().unwrap();
let path = db::tests::make_test_cache_db(&dir.path().join("cache.sqlite3"));
(dir, Arc::new(path))
}
fn app_no_db() -> Router {
build_app(AppState { db: None, cache: None })
}
fn app_with_db() -> (tempfile::TempDir, Router) {
let (dir, db) = make_test_db();
let app = build_app(AppState { db: Some(db), cache: None });
(dir, app)
}
fn app_with_cache_only() -> (tempfile::TempDir, Router) {
let (dir, cache) = make_test_cache_db();
let app = build_app(AppState { db: None, cache: Some(cache) });
(dir, app)
}
#[tokio::test]
async fn index_returns_200() {
let response = app_no_db()
.oneshot(Request::builder().uri("/").body(Body::empty()).unwrap())
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
}
#[tokio::test]
async fn schema_json_returns_json() {
let response = app_no_db()
.oneshot(
Request::builder()
.uri("/commonmeta_v1.0.json")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let ct = response.headers().get("content-type").unwrap().to_str().unwrap();
assert!(ct.contains("application/json"));
}
#[tokio::test]
async fn non_doi_path_returns_404() {
let response = app_no_db()
.oneshot(
Request::builder()
.uri("/not-a-doi")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::NOT_FOUND);
}
#[tokio::test]
async fn arbitrary_text_path_returns_404() {
let response = app_no_db()
.oneshot(
Request::builder()
.uri("/hello/world")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::NOT_FOUND);
}
#[tokio::test]
async fn unsupported_accept_returns_406() {
let response = app_no_db()
.oneshot(
Request::builder()
.uri("/10.1234/test")
.header("Accept", "application/rdf+xml")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::NOT_ACCEPTABLE);
}
#[tokio::test]
async fn multiple_unsupported_accept_returns_406() {
let response = app_no_db()
.oneshot(
Request::builder()
.uri("/10.1234/test")
.header("Accept", "application/rdf+xml, image/png")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::NOT_ACCEPTABLE);
}
#[tokio::test]
async fn invalid_format_query_param_returns_406() {
let response = app_no_db()
.oneshot(
Request::builder()
.uri("/10.1234/test?format=nonsense")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::NOT_ACCEPTABLE);
}
#[tokio::test]
async fn html_accept_with_cache_returns_redirect() {
let (_dir, app) = app_with_cache_only();
let response = app
.oneshot(
Request::builder()
.uri("/10.5678/cached")
.header("Accept", "text/html")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let ct = response.headers().get("content-type").unwrap().to_str().unwrap();
assert!(ct.contains("text/html"), "unexpected content-type: {ct}");
}
#[tokio::test]
async fn bibtex_accept_with_cache_returns_200() {
let (_dir, app) = app_with_cache_only();
let response = app
.oneshot(
Request::builder()
.uri("/10.5678/cached")
.header("Accept", "application/x-bibtex")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let ct = response.headers().get("content-type").unwrap().to_str().unwrap();
assert!(ct.contains("application/x-bibtex"), "unexpected content-type: {ct}");
let body = axum::body::to_bytes(response.into_body(), 1024 * 1024).await.unwrap();
let text = std::str::from_utf8(&body).unwrap();
assert!(text.starts_with('@'), "bibtex should start with '@': {text}");
}
#[tokio::test]
async fn html_accept_with_db_returns_redirect() {
let (_dir, app) = app_with_db();
let response = app
.oneshot(
Request::builder()
.uri("/10.1234/test")
.header("Accept", "text/html")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let ct = response.headers().get("content-type").unwrap().to_str().unwrap();
assert!(ct.contains("text/html"), "unexpected content-type: {ct}");
}
#[tokio::test]
async fn bibtex_accept_with_db_returns_200() {
let (_dir, app) = app_with_db();
let response = app
.oneshot(
Request::builder()
.uri("/10.1234/test")
.header("Accept", "application/x-bibtex")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let ct = response.headers().get("content-type").unwrap().to_str().unwrap();
assert!(ct.contains("application/x-bibtex"), "unexpected content-type: {ct}");
let body = axum::body::to_bytes(response.into_body(), 1024 * 1024)
.await
.unwrap();
let text = std::str::from_utf8(&body).unwrap();
assert!(text.starts_with('@'), "bibtex should start with '@': {text}");
}
#[tokio::test]
async fn format_query_param_with_db_returns_bibtex() {
let (_dir, app) = app_with_db();
let response = app
.oneshot(
Request::builder()
.uri("/10.1234/test?format=bibtex")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let ct = response.headers().get("content-type").unwrap().to_str().unwrap();
assert!(ct.contains("application/x-bibtex"), "unexpected content-type: {ct}");
}
#[tokio::test]
async fn dois_route_returns_datacite_json() {
let (_dir, app) = app_with_db();
let response = app
.oneshot(
Request::builder()
.uri("/dois/10.1234/test")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let ct = response.headers().get("content-type").unwrap().to_str().unwrap();
assert!(ct.contains("datacite.datacite+json"), "unexpected content-type: {ct}");
let body = axum::body::to_bytes(response.into_body(), 1024 * 1024).await.unwrap();
let v: serde_json::Value = serde_json::from_slice(&body).unwrap();
assert!(v.get("id").is_some() || v.get("doi").is_some(), "no id/doi field in datacite json");
}
#[tokio::test]
async fn works_route_returns_crossref_json() {
let (_dir, app) = app_with_db();
let response = app
.oneshot(
Request::builder()
.uri("/works/10.1234/test")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let ct = response.headers().get("content-type").unwrap().to_str().unwrap();
assert!(ct.contains("vnd.crossref+json"), "unexpected content-type: {ct}");
let body = axum::body::to_bytes(response.into_body(), 1024 * 1024).await.unwrap();
let v: serde_json::Value = serde_json::from_slice(&body).unwrap();
assert!(v["message"].get("DOI").is_some(), "no message.DOI field in crossref json: {v}");
}
#[tokio::test]
async fn csl_json_accept_with_db_returns_200() {
let (_dir, app) = app_with_db();
let response = app
.oneshot(
Request::builder()
.uri("/10.1234/test")
.header("Accept", "application/vnd.citationstyles.csl+json")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let ct = response.headers().get("content-type").unwrap().to_str().unwrap();
assert!(ct.contains("citationstyles.csl+json"), "unexpected content-type: {ct}");
}
}