use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use axum::{
Json, Router,
extract::{Path, Query, State},
http::{HeaderMap, StatusCode, header},
response::{IntoResponse, Redirect, Response},
routing::{get, post},
};
use clap::{Args, Parser, Subcommand};
use serde::{Deserialize, Serialize};
use std::net::SocketAddr;
use tokio::signal::unix::{SignalKind, signal};
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
mod db;
mod error;
mod negotiate;
use error::AppError;
use negotiate::NegotiateResult;
#[derive(Parser)]
#[command(name = "dragoman", about = "PID redirection and content negotiation server")]
struct Cli {
#[command(subcommand)]
command: Command,
}
#[derive(Subcommand)]
enum Command {
Start(StartArgs),
Stop(StopArgs),
}
#[derive(Args)]
struct StartArgs {
#[arg(short, long, env = "PORT", default_value_t = 3456)]
port: u16,
#[arg(short, long, env = "COMMONMETA_DB")]
db: Option<PathBuf>,
#[arg(long, env = "DRAGOMAN_CACHE_DB")]
cache_db: Option<PathBuf>,
#[arg(long, env = "DRAGOMAN_PID_FILE")]
pid_file: Option<PathBuf>,
}
#[derive(Args)]
struct StopArgs {
#[arg(long, env = "DRAGOMAN_PID_FILE", default_value = "/tmp/dragoman.pid")]
pid_file: PathBuf,
}
#[tokio::main]
async fn main() {
tracing_subscriber::registry()
.with(
tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| "dragoman=info".into()),
)
.with(tracing_subscriber::fmt::layer())
.init();
let cli = Cli::parse();
match cli.command {
Command::Start(args) => cmd_start(args).await,
Command::Stop(args) => cmd_stop(args),
}
}
fn platform_default_db_path() -> PathBuf {
#[cfg(target_os = "macos")]
{
let home = std::env::var("HOME").unwrap_or_default();
PathBuf::from(format!(
"{}/Library/Application Support/commonmeta/commonmeta.sqlite3",
home
))
}
#[cfg(target_os = "linux")]
{
PathBuf::from("/var/lib/commonmeta/commonmeta.sqlite3")
}
#[cfg(not(any(target_os = "macos", target_os = "linux")))]
{
PathBuf::from("commonmeta.sqlite3")
}
}
fn platform_default_cache_path() -> PathBuf {
platform_default_db_path()
.with_file_name("cache.sqlite3")
}
async fn cmd_start(args: StartArgs) {
let db_path = args.db.unwrap_or_else(platform_default_db_path);
let database = {
let path = db_path.clone();
match tokio::task::spawn_blocking(move || db::open(&path))
.await
.expect("spawn_blocking panicked")
{
Ok(Some(p)) => {
tracing::info!(path = %p.display(), "using local sqlite database");
Some(Arc::new(p))
}
Ok(None) => None,
Err(e) => {
tracing::error!(error = %e, "failed to open database");
std::process::exit(1);
}
}
};
let cache_path = args.cache_db.unwrap_or_else(platform_default_cache_path);
let cache = {
let path = cache_path.clone();
match tokio::task::spawn_blocking(move || db::cache_open(&path))
.await
.expect("spawn_blocking panicked")
{
Ok(p) => {
tracing::info!(path = %p.display(), "using metadata cache database");
Some(Arc::new(p))
}
Err(e) => {
tracing::warn!(path = %cache_path.display(), error = %e, "cache database unavailable, caching disabled");
None
}
}
};
if let Some(ref path) = args.pid_file {
if let Err(e) = std::fs::write(path, std::process::id().to_string()) {
tracing::error!(path = %path.display(), error = %e, "failed to write PID file");
std::process::exit(1);
}
tracing::info!(path = %path.display(), "wrote PID file");
}
let app = build_app(AppState { db: database, cache });
let addr = SocketAddr::from(([0, 0, 0, 0], args.port));
let listener = match tokio::net::TcpListener::bind(addr).await {
Ok(l) => l,
Err(e) => {
tracing::error!(port = args.port, error = %e, "failed to bind");
if let Some(ref path) = args.pid_file {
std::fs::remove_file(path).ok();
}
std::process::exit(1);
}
};
tracing::info!("listening on {addr}");
let pid_file = args.pid_file.clone();
let shutdown = async move {
let mut sigterm = match signal(SignalKind::terminate()) {
Ok(s) => s,
Err(e) => {
tracing::error!(error = %e, "failed to register SIGTERM handler");
std::process::exit(1);
}
};
tokio::select! {
_ = tokio::signal::ctrl_c() => {}
_ = sigterm.recv() => {}
}
if let Some(ref path) = pid_file {
std::fs::remove_file(path).ok();
tracing::info!(path = %path.display(), "removed PID file");
}
};
if let Err(e) = axum::serve(listener, app)
.with_graceful_shutdown(shutdown)
.await
{
tracing::error!(error = %e, "server error");
std::process::exit(1);
}
}
fn cmd_stop(args: StopArgs) {
let pid_str = match std::fs::read_to_string(&args.pid_file) {
Ok(s) => s,
Err(e) => {
eprintln!(
"error: cannot read PID file '{}': {e}",
args.pid_file.display()
);
std::process::exit(1);
}
};
let pid: u32 = match pid_str.trim().parse() {
Ok(p) => p,
Err(_) => {
eprintln!(
"error: PID file '{}' does not contain a valid PID",
args.pid_file.display()
);
std::process::exit(1);
}
};
let status = std::process::Command::new("kill")
.arg(pid.to_string())
.status();
match status {
Ok(s) if s.success() => {
println!("sent SIGTERM to process {pid}");
std::fs::remove_file(&args.pid_file).ok();
}
Ok(_) => {
eprintln!("error: kill({pid}) failed — process may have already exited");
std::process::exit(1);
}
Err(e) => {
eprintln!("error: failed to run kill: {e}");
std::process::exit(1);
}
}
}
#[derive(Clone)]
struct AppState {
db: Option<Arc<PathBuf>>,
cache: Option<Arc<PathBuf>>,
}
#[derive(Deserialize)]
struct PidQuery {
format: Option<String>,
source: Option<String>,
style: Option<String>,
locale: Option<String>,
local_only: Option<bool>,
}
async fn index() -> impl IntoResponse {
(
[(header::CONTENT_TYPE, "text/html; charset=utf-8")],
include_str!("../ui/dist/index.html"),
)
}
async fn handle_pid(
State(state): State<AppState>,
Path(path): Path<String>,
Query(query): Query<PidQuery>,
headers: HeaderMap,
) -> Result<Response, AppError> {
let accept = headers
.get(header::ACCEPT)
.and_then(|v| v.to_str().ok())
.unwrap_or("text/html");
let wants_html = query.format.is_none()
&& matches!(negotiate::negotiate(accept), NegotiateResult::Redirect);
if is_ror_id(&path) {
if wants_html {
let ror_url = format!("https://ror.org/{path}");
let (org_data, org_types, works, rel_names) =
fetch_ror_data(ror_url, state.db.as_deref().cloned(), !is_localhost(&headers)).await?;
let bytes = ror_to_commonmeta_json(&org_data, &org_types, &works, state.db.as_deref().map(|p| p.as_path()), &rel_names)?;
return entity_page_html(bytes);
}
return Ok(Redirect::temporary(&format!("/ror/{path}")).into_response());
}
if is_orcid_id(&path) {
if wants_html {
let orcid_url = format!("https://orcid.org/{path}");
let (person_data, works) =
fetch_orcid_data(orcid_url, state.db.as_deref().cloned(), !is_localhost(&headers)).await?;
let bytes = orcid_to_commonmeta_json(&person_data, &works)?;
return entity_page_html(bytes);
}
return Ok(Redirect::temporary(&format!("/orcid/{path}")).into_response());
}
let doi = commonmeta::doi_utils::validate_doi(&path)
.ok_or_else(|| AppError::NotFound(path.clone()))?;
tracing::info!(doi = %doi, "resolving PID");
let (format, content_type, style, locale): (
String,
&'static str,
Option<String>,
Option<String>,
) = if let Some(fmt) = &query.format {
let ct = negotiate::content_type_for_format(fmt)
.ok_or_else(|| AppError::UnsupportedFormat(fmt.clone()))?;
(fmt.clone(), ct, query.style.clone(), query.locale.clone())
} else {
match negotiate::negotiate(accept) {
NegotiateResult::Format(n) => (
n.format.to_string(),
n.content_type,
n.style.or_else(|| query.style.clone()),
n.locale.or_else(|| query.locale.clone()),
),
NegotiateResult::Redirect => {
let bytes = fetch_and_convert(
&doi,
"commonmeta",
query.source.as_deref(),
None,
None,
state.db.as_deref(),
state.cache.as_deref(),
false,
)
.await?;
return entity_page_html(bytes);
}
NegotiateResult::NotAcceptable => {
return Err(AppError::UnsupportedFormat(accept.to_string()));
}
}
};
let bytes = fetch_and_convert(
&doi,
&format,
query.source.as_deref(),
style.as_deref(),
locale.as_deref(),
state.db.as_deref(),
state.cache.as_deref(),
query.local_only.unwrap_or(false),
)
.await?;
Ok((StatusCode::OK, [(header::CONTENT_TYPE, content_type)], bytes).into_response())
}
async fn fetch_and_convert(
doi: &str,
format: &str,
source: Option<&str>,
style: Option<&str>,
locale: Option<&str>,
db: Option<&PathBuf>,
cache: Option<&PathBuf>,
local_only: bool,
) -> Result<Vec<u8>, AppError> {
let doi = doi.to_string();
let format = format.to_string();
let source = source.map(str::to_string);
let style = style.map(str::to_string);
let locale = locale.map(str::to_string);
let db = db.cloned();
let cache = cache.cloned();
tokio::task::spawn_blocking(move || {
if let Some(ref path) = db {
if let Some(data) = db::lookup(path, &doi)? {
tracing::debug!(doi = %doi, "served from local sqlite");
let data = enrich_citations(data, &db, &doi);
return commonmeta::write_with_style(
&format,
&data,
style.as_deref(),
locale.as_deref(),
)
.map_err(|e| AppError::FetchError(e.to_string()));
}
}
if let Some(ref path) = cache {
if let Some(data) = db::cache_lookup(path, &doi)? {
tracing::debug!(doi = %doi, "served from cache sqlite");
let data = enrich_citations(data, &db, &doi);
return commonmeta::write_with_style(
&format,
&data,
style.as_deref(),
locale.as_deref(),
)
.map_err(|e| AppError::FetchError(e.to_string()));
}
}
if local_only {
return Err(AppError::NotFound(doi));
}
let from = resolve_source(&doi, source.as_deref());
let data = commonmeta::read(&from, &doi)
.map_err(|e| AppError::FetchError(e.to_string()))?;
if let Some(ref path) = cache {
if let Err(e) = db::cache_insert(path, &data, &from) {
tracing::warn!(doi = %doi, error = %e, "failed to cache fetched metadata");
} else {
tracing::debug!(doi = %doi, "cached fetched metadata");
}
}
let data = enrich_citations(data, &db, &doi);
commonmeta::write_with_style(&format, &data, style.as_deref(), locale.as_deref())
.map_err(|e| AppError::FetchError(e.to_string()))
})
.await
.map_err(|e| AppError::Internal(e.to_string()))?
}
fn enrich_citations(mut data: commonmeta::Data, db: &Option<PathBuf>, doi: &str) -> commonmeta::Data {
if let Some(path) = db {
if let Ok(citing) = db::lookup_citing_dois(path, doi) {
if !citing.is_empty() {
data.citations = citing;
}
}
}
data
}
#[derive(Deserialize)]
struct BibliographyRequest {
items: Vec<BibRequestItem>,
style: Option<String>,
locale: Option<String>,
}
#[derive(Deserialize)]
struct BibRequestItem {
id: String,
data: String, }
#[derive(Serialize)]
struct BibliographyResponse {
items: Vec<BibResponseItem>,
}
#[derive(Serialize)]
struct BibResponseItem {
id: String,
html: String,
}
async fn handle_bibliography(
Json(req): Json<BibliographyRequest>,
) -> Result<Json<BibliographyResponse>, AppError> {
let style = req.style;
let locale = req.locale;
let items = req.items;
let results = tokio::task::spawn_blocking(move || {
items
.into_iter()
.map(|item| {
let first_id = serde_json::from_str::<serde_json::Value>(&item.data)
.ok()
.and_then(|v| {
v.as_array()?
.first()?
.get("id")?
.as_str()
.map(String::from)
})
.unwrap_or_default();
if first_id.starts_with("https://ror.org/")
|| first_id.starts_with("https://orcid.org/")
{
return Ok(BibResponseItem { id: item.id, html: String::new() });
}
let data = commonmeta::read("commonmeta", &item.data)
.map_err(|e| AppError::FetchError(e.to_string()))?;
let bytes = commonmeta::write_with_style(
"citation",
&data,
style.as_deref(),
locale.as_deref(),
)
.map_err(|e| AppError::FetchError(e.to_string()))?;
Ok(BibResponseItem {
id: item.id,
html: String::from_utf8_lossy(&bytes).into_owned(),
})
})
.collect::<Result<Vec<_>, AppError>>()
})
.await
.map_err(|e| AppError::Internal(e.to_string()))??;
Ok(Json(BibliographyResponse { items: results }))
}
async fn handle_pmid(
State(state): State<AppState>,
Path(pmid): Path<String>,
Query(query): Query<PidQuery>,
) -> Result<Response, AppError> {
let fmt = query.format.clone();
let db = state.db.as_deref().cloned();
let id = pmid.clone();
let data = tokio::task::spawn_blocking(move || -> Result<commonmeta::Data, AppError> {
if let Some(ref path) = db {
if let Some(d) = db::lookup_by_pmid(path, &id)? {
return Ok(d);
}
}
tracing::debug!(pmid = %id, "pmid fetching from europepmc");
commonmeta::read("pubmed", &id).map_err(|e| AppError::FetchError(e.to_string()))
})
.await
.map_err(|e| AppError::Internal(e.to_string()))??;
format_or_redirect(data, fmt.as_deref())
}
async fn handle_pmcid(
State(state): State<AppState>,
Path(pmcid): Path<String>,
Query(query): Query<PidQuery>,
) -> Result<Response, AppError> {
let fmt = query.format.clone();
let db = state.db.as_deref().cloned();
let id = pmcid.clone();
let data = tokio::task::spawn_blocking(move || -> Result<commonmeta::Data, AppError> {
if let Some(ref path) = db {
if let Some(d) = db::lookup_by_pmcid(path, &id)? {
return Ok(d);
}
}
tracing::debug!(pmcid = %id, "pmcid fetching from europepmc");
commonmeta::read("pubmed", &id).map_err(|e| AppError::FetchError(e.to_string()))
})
.await
.map_err(|e| AppError::Internal(e.to_string()))??;
format_or_redirect(data, fmt.as_deref())
}
async fn handle_arxiv(
State(state): State<AppState>,
Path(arxiv_id): Path<String>,
Query(query): Query<PidQuery>,
) -> Result<Response, AppError> {
let fmt = query.format.clone();
let db = state.db.as_deref().cloned();
let id = arxiv_id.clone();
let data = tokio::task::spawn_blocking(move || -> Result<commonmeta::Data, AppError> {
if let Some(ref path) = db {
if let Some(d) = db::lookup_by_arxiv(path, &id)? {
return Ok(d);
}
}
let doi = format!("10.48550/arXiv.{id}");
tracing::debug!(arxiv = %id, doi = %doi, "arxiv fetching from datacite");
commonmeta::read("datacite", &doi).map_err(|e| AppError::FetchError(e.to_string()))
})
.await
.map_err(|e| AppError::Internal(e.to_string()))??;
format_or_redirect(data, fmt.as_deref())
}
fn format_or_redirect(data: commonmeta::Data, fmt: Option<&str>) -> Result<Response, AppError> {
if let Some(format) = fmt {
let ct = negotiate::content_type_for_format(format)
.ok_or_else(|| AppError::UnsupportedFormat(format.to_string()))?;
let bytes = commonmeta::write_with_style(format, &data, None, None)
.map_err(|e| AppError::FetchError(e.to_string()))?;
return Ok((StatusCode::OK, [(header::CONTENT_TYPE, ct)], bytes).into_response());
}
if let Some(doi) = data.id.strip_prefix("https://doi.org/") {
return Ok(Redirect::temporary(&format!("/{doi}")).into_response());
}
if let Some(ror_id) = data.id.strip_prefix("https://ror.org/") {
return Ok(Redirect::temporary(&format!("/{ror_id}")).into_response());
}
if let Some(orcid_id) = data.id.strip_prefix("https://orcid.org/") {
return Ok(Redirect::temporary(&format!("/{orcid_id}")).into_response());
}
Err(AppError::NotFound(data.id))
}
async fn handle_openalex(
State(state): State<AppState>,
Path(id): Path<String>,
Query(query): Query<PidQuery>,
) -> Result<Response, AppError> {
let format = query.format.as_deref().unwrap_or("commonmeta").to_string();
let content_type = negotiate::content_type_for_format(&format)
.ok_or_else(|| AppError::UnsupportedFormat(format.clone()))?;
let db = state.db.as_deref().cloned();
let cache = state.cache.as_deref().cloned();
let id2 = id.clone();
let fmt2 = format.clone();
let bytes = tokio::task::spawn_blocking(move || {
if let Some(ref path) = db {
if let Some(data) = db::lookup_by_openalex(path, &id2)? {
tracing::debug!(id = %id2, "openalex served from local sqlite");
return commonmeta::write_with_style(&fmt2, &data, None, None)
.map_err(|e| AppError::FetchError(e.to_string()));
}
}
tracing::debug!(id = %id2, "openalex fetching from remote");
let data = commonmeta::read("openalex", &id2)
.map_err(|e| AppError::FetchError(e.to_string()))?;
if let Some(ref path) = cache {
if let Err(e) = db::cache_insert(path, &data, "openalex") {
tracing::warn!(id = %id2, error = %e, "failed to cache openalex metadata");
}
}
commonmeta::write_with_style(&fmt2, &data, None, None)
.map_err(|e| AppError::FetchError(e.to_string()))
})
.await
.map_err(|e| AppError::Internal(e.to_string()))??;
Ok((StatusCode::OK, [(header::CONTENT_TYPE, content_type)], bytes).into_response())
}
fn ror_to_commonmeta_json(
data: &commonmeta::Data,
org_types: &[String],
works: &[commonmeta::Data],
db: Option<&std::path::Path>,
rel_names: &HashMap<String, String>,
) -> Result<Vec<u8>, AppError> {
use serde_json::{Map, Value};
const ID_TYPE_MAP: &[(&str, &str)] = &[
("crossref funder id", "FundRef"),
("fundref", "FundRef"),
("grid", "GRID"),
("wikidata", "Wikidata"),
("isni", "ISNI"),
];
let normalize_id_type = |raw: &str| -> &'static str {
let lower = raw.to_lowercase();
ID_TYPE_MAP
.iter()
.find(|(k, _)| *k == lower.as_str())
.map(|(_, v)| *v)
.unwrap_or("Other")
};
let mut obj = Map::new();
obj.insert("id".to_string(), Value::String(data.id.clone()));
let display_name = if !data.name.is_empty() { &data.name } else { &data.title };
if !display_name.is_empty() {
obj.insert("name".to_string(), Value::String(display_name.to_string()));
}
if !data.acronym.is_empty() {
obj.insert("acronym".to_string(), Value::String(data.acronym.clone()));
}
if !data.additional_names.is_empty() {
obj.insert("additional_names".to_string(), serde_json::json!(data.additional_names));
}
if !data.status.is_empty() {
obj.insert("status".to_string(), Value::String(data.status.clone()));
}
if let Some(year) = data.established {
obj.insert("established".to_string(), Value::Number(year.into()));
}
if !data.date_updated.is_empty() {
obj.insert("date_updated".to_string(), Value::String(data.date_updated.clone()));
}
if !data.country.is_empty() {
obj.insert("country".to_string(), Value::String(data.country.clone()));
}
if let Some(geo) = data.geo_locations.first() {
if !geo.place.is_empty() || !geo.id.is_empty() {
let mut loc = Map::new();
if !geo.id.is_empty() { loc.insert("id".to_string(), Value::String(geo.id.clone())); }
if !geo.place.is_empty() { loc.insert("place".to_string(), Value::String(geo.place.clone())); }
if let Some(db_path) = db {
if let Some(geonames_id) = geo.id
.strip_prefix("https://www.geonames.org/")
.and_then(|s| s.parse::<i64>().ok())
{
if let Ok(gn) = commonmeta::geonames::fetch_geoname_raw(geonames_id, db_path) {
loc.insert("city".to_string(), Value::String(gn.name.clone()));
if !gn.country_code.is_empty() {
loc.insert("country_code".to_string(), Value::String(gn.country_code.clone()));
}
if !gn.admin1_code.is_empty() {
let region_code = format!("{}-{}", gn.country_code, gn.admin1_code);
loc.insert("region_code".to_string(), Value::String(region_code));
if let Ok(Some(name)) = commonmeta::geonames::lookup_admin1(
&gn.country_code, &gn.admin1_code, db_path,
) {
loc.insert("region_name".to_string(), Value::String(name));
}
}
}
}
}
obj.insert("location".to_string(), Value::Object(loc));
}
}
let urls_arr: Vec<Value> = if !data.urls.is_empty() {
data.urls.iter().filter(|u| !u.url.is_empty()).map(|u| {
let mut m = Map::new();
if !u.name.is_empty() { m.insert("name".to_string(), Value::String(u.name.clone())); }
m.insert("url".to_string(), Value::String(u.url.clone()));
Value::Object(m)
}).collect()
} else if !data.url.is_empty() {
vec![serde_json::json!({"url": data.url})]
} else {
vec![]
};
if !urls_arr.is_empty() {
obj.insert("urls".to_string(), Value::Array(urls_arr));
}
let ids: Vec<Value> = data
.identifiers
.iter()
.filter(|id| !id.identifier.is_empty())
.map(|id| {
let mut m = Map::new();
m.insert("identifier".to_string(), Value::String(id.identifier.clone()));
m.insert("identifier_type".to_string(), Value::String(normalize_id_type(&id.identifier_type).to_string()));
Value::Object(m)
})
.collect();
if !ids.is_empty() {
obj.insert("identifiers".to_string(), Value::Array(ids));
}
let rels: Vec<Value> = data
.relations
.iter()
.filter(|r| !r.id.is_empty())
.map(|r| {
let mut m = Map::new();
m.insert("id".to_string(), Value::String(r.id.clone()));
m.insert("type".to_string(), Value::String(r.type_.clone()));
if let Some(name) = rel_names.get(&r.id) {
m.insert("name".to_string(), Value::String(name.clone()));
}
Value::Object(m)
})
.collect();
if !rels.is_empty() {
obj.insert("relations".to_string(), Value::Array(rels));
}
let types: &[String] = if !org_types.is_empty() { org_types } else { &data.types };
if !types.is_empty() {
obj.insert(
"types".to_string(),
Value::Array(types.iter().map(|t| Value::String(t.clone())).collect()),
);
}
let mut arr = vec![Value::Object(obj)];
for work in works {
let prepared = commonmeta::prepare_commonmeta(work);
let work_val = serde_json::to_value(&prepared)
.map_err(|e| AppError::FetchError(e.to_string()))?;
arr.push(work_val);
}
serde_json::to_vec_pretty(&Value::Array(arr))
.map_err(|e| AppError::FetchError(e.to_string()))
}
async fn handle_ror(
State(state): State<AppState>,
Path(id): Path<String>,
Query(query): Query<PidQuery>,
headers: HeaderMap,
) -> Result<Response, AppError> {
let ror_url = format!("https://ror.org/{id}");
let (org_data, org_types, works, rel_names) =
fetch_ror_data(ror_url, state.db.as_deref().cloned(), !is_localhost(&headers)).await?;
let fmt = query.format.as_deref().unwrap_or("commonmeta");
if fmt == "commonmeta" {
let bytes = ror_to_commonmeta_json(&org_data, &org_types, &works, state.db.as_deref().map(|p| p.as_path()), &rel_names)?;
return Ok((
StatusCode::OK,
[(header::CONTENT_TYPE, "application/json; charset=utf-8")],
bytes,
)
.into_response());
}
format_or_redirect(org_data, Some(fmt))
}
async fn handle_orcid(
State(state): State<AppState>,
Path(id): Path<String>,
Query(query): Query<PidQuery>,
headers: HeaderMap,
) -> Result<Response, AppError> {
let orcid_url = format!("https://orcid.org/{id}");
let fmt = query.format.as_deref().unwrap_or("commonmeta");
if fmt == "commonmeta" {
let (person_data, works) =
fetch_orcid_data(orcid_url, state.db.as_deref().cloned(), !is_localhost(&headers)).await?;
let bytes = orcid_to_commonmeta_json(&person_data, &works)?;
return Ok((
StatusCode::OK,
[(header::CONTENT_TYPE, "application/json; charset=utf-8")],
bytes,
)
.into_response());
}
let fmt = fmt.to_string();
let data = tokio::task::spawn_blocking(move || {
commonmeta::fetch_orcid(&orcid_url).map_err(|e| AppError::FetchError(e.to_string()))
})
.await
.map_err(|e| AppError::Internal(e.to_string()))??;
format_or_redirect(data, Some(&fmt))
}
async fn handle_dois(
State(state): State<AppState>,
Path(path): Path<String>,
Query(query): Query<PidQuery>,
) -> Result<Response, AppError> {
handle_pid_with_format(state, path, query, "datacite").await
}
async fn handle_works(
State(state): State<AppState>,
Path(path): Path<String>,
Query(query): Query<PidQuery>,
) -> Result<Response, AppError> {
handle_pid_with_format(state, path, query, "crossref").await
}
async fn handle_pid_with_format(
state: AppState,
path: String,
query: PidQuery,
format: &'static str,
) -> Result<Response, AppError> {
let doi = commonmeta::doi_utils::validate_doi(&path)
.ok_or_else(|| AppError::NotFound(path.clone()))?;
let content_type = negotiate::content_type_for_format(format)
.ok_or_else(|| AppError::UnsupportedFormat(format.to_string()))?;
tracing::info!(doi = %doi, format, "resolving PID");
let bytes = fetch_and_convert(
&doi,
format,
query.source.as_deref(),
None,
None,
state.db.as_deref(),
state.cache.as_deref(),
false,
)
.await?;
Ok((StatusCode::OK, [(header::CONTENT_TYPE, content_type)], bytes).into_response())
}
fn build_app(state: AppState) -> Router {
Router::new()
.route("/", get(index))
.route("/commonmeta_v1.0.json", get(schema_json))
.route("/status", get(handle_status))
.route("/random", get(handle_random))
.route("/bibliography", post(handle_bibliography))
.route("/pmid/{pmid}", get(handle_pmid))
.route("/pmcid/{pmcid}", get(handle_pmcid))
.route("/arxiv/{id}", get(handle_arxiv))
.route("/openalex/{id}", get(handle_openalex))
.route("/ror/{id}", get(handle_ror))
.route("/orcid/{id}", get(handle_orcid))
.route("/dois/{*path}", get(handle_dois))
.route("/works/{*path}", get(handle_works))
.route("/{*path}", get(handle_pid))
.with_state(state)
}
async fn schema_json() -> impl axum::response::IntoResponse {
(
[(axum::http::header::CONTENT_TYPE, "application/json")],
commonmeta::SCHEMA_JSON,
)
}
async fn handle_status(
State(state): State<AppState>,
) -> Json<serde_json::Value> {
Json(serde_json::json!({ "db": state.db.is_some() }))
}
async fn handle_random(
State(state): State<AppState>,
) -> Result<Json<serde_json::Value>, AppError> {
let Some(db) = state.db else {
return Err(AppError::NotFound("no local database".to_string()));
};
let id = tokio::task::spawn_blocking(move || db::random_id(&db))
.await
.map_err(|e| AppError::Internal(e.to_string()))??;
match id {
Some(id) => Ok(Json(serde_json::json!({ "id": id }))),
None => Err(AppError::NotFound("no records found".to_string())),
}
}
fn is_ror_id(s: &str) -> bool {
s.len() == 9 && s.chars().all(|c| c.is_ascii_lowercase() || c.is_ascii_digit())
}
fn is_orcid_id(s: &str) -> bool {
let parts: Vec<&str> = s.split('-').collect();
if parts.len() != 4 {
return false;
}
parts[..3].iter().all(|p| p.len() == 4 && p.chars().all(|c| c.is_ascii_digit()))
&& parts[3].len() == 4
&& {
let b = parts[3].as_bytes();
b[..3].iter().all(|c| c.is_ascii_digit()) && (b[3].is_ascii_digit() || b[3] == b'X')
}
}
fn html_escape(s: &str) -> String {
s.replace('&', "&")
.replace('"', """)
.replace('<', "<")
.replace('>', ">")
}
fn build_og_tags(json_str: &str) -> String {
let v: serde_json::Value = serde_json::from_str(json_str).unwrap_or_default();
let obj = if v.is_array() { v[0].clone() } else { v };
let id = obj["id"].as_str().unwrap_or("");
let title = if let Some(t) = obj["title"].as_str() {
t.to_owned()
} else if obj["given_name"].is_string() || obj["family_name"].is_string() {
let given = obj["given_name"].as_str().unwrap_or("");
let family = obj["family_name"].as_str().unwrap_or("");
format!("{} {}", given, family).trim().to_owned()
} else {
obj["name"].as_str().unwrap_or("").to_owned()
};
let description = obj["description"].as_str().unwrap_or("");
let og_type = if id.starts_with("https://orcid.org") {
"profile"
} else if id.starts_with("https://ror.org") {
"website"
} else {
"article"
};
let mut tags = format!(
"<meta property=\"og:title\" content=\"{}\">\n\
<meta property=\"og:url\" content=\"{}\">\n\
<meta property=\"og:type\" content=\"{}\">",
html_escape(&title),
html_escape(id),
og_type,
);
if !description.is_empty() {
tags.push_str(&format!(
"\n<meta property=\"og:description\" content=\"{}\">",
html_escape(description)
));
}
tags.push_str("\n<meta name=\"twitter:card\" content=\"summary\">");
tags
}
fn entity_page_html(json_bytes: Vec<u8>) -> Result<Response, AppError> {
let html = include_str!("../ui/dist/index.html");
let json_str = String::from_utf8(json_bytes).map_err(|e| AppError::Internal(e.to_string()))?;
let og_tags = build_og_tags(&json_str);
let script = format!("<script>window.__DRAGOMAN_INIT__={};</script>", json_str);
let page = html
.replacen("</head>", &format!("{}\n</head>", og_tags), 1)
.replacen("</head>", &format!("{}\n</head>", script), 1);
Ok((
StatusCode::OK,
[(header::CONTENT_TYPE, "text/html; charset=utf-8")],
page,
)
.into_response())
}
fn orcid_to_commonmeta_json(
person_data: &commonmeta::Data,
works: &[commonmeta::Data],
) -> Result<Vec<u8>, AppError> {
use serde_json::Value;
let person_val = serde_json::to_value(commonmeta::prepare_commonmeta(person_data))
.map_err(|e| AppError::FetchError(e.to_string()))?;
let mut arr = vec![person_val];
for work in works {
let prepared = commonmeta::prepare_commonmeta(work);
let work_val =
serde_json::to_value(&prepared).map_err(|e| AppError::FetchError(e.to_string()))?;
arr.push(work_val);
}
serde_json::to_vec_pretty(&Value::Array(arr))
.map_err(|e| AppError::FetchError(e.to_string()))
}
async fn fetch_orcid_data(
orcid_url: String,
db: Option<PathBuf>,
local_only: bool,
) -> Result<(commonmeta::Data, Vec<commonmeta::Data>), AppError> {
tokio::task::spawn_blocking(move || {
let data = commonmeta::fetch_orcid(&orcid_url)
.map_err(|e| AppError::FetchError(e.to_string()))?;
let sqlite_works = if let Some(ref path) = db {
commonmeta::read_sqlite_by_orcid(&orcid_url, path).unwrap_or_default()
} else {
Vec::new()
};
let mut works = sqlite_works;
if !local_only && works.len() < 200 {
let mut cr =
commonmeta::fetch_crossref_by_orcid(&orcid_url, 100, 1).unwrap_or_default();
let mut dc =
commonmeta::fetch_datacite_by_orcid(&orcid_url, 100, 1).unwrap_or_default();
cr.append(&mut dc);
works.append(&mut cr);
}
let mut seen = std::collections::HashSet::new();
works.retain(|w| seen.insert(w.id.clone()));
works.sort_by(|a, b| b.date_published.cmp(&a.date_published));
works.truncate(200);
Ok((data, works))
})
.await
.map_err(|e| AppError::Internal(e.to_string()))?
}
async fn fetch_ror_data(
ror_url: String,
db: Option<PathBuf>,
local_only: bool,
) -> Result<(commonmeta::Data, Vec<String>, Vec<commonmeta::Data>, HashMap<String, String>), AppError> {
tokio::task::spawn_blocking(move || {
let (data, rel_names) = if let Some(ref path) = db {
if let Some(result) = db::lookup_by_ror(path, &ror_url)? {
tracing::debug!(id = %ror_url, "ror served from local sqlite");
result
} else {
let d = commonmeta::fetch_ror(&ror_url).map_err(|e| AppError::FetchError(e.to_string()))?;
(d, HashMap::new())
}
} else {
let d = commonmeta::fetch_ror(&ror_url).map_err(|e| AppError::FetchError(e.to_string()))?;
(d, HashMap::new())
};
let org_types = if let Some(ref path) = db {
db::lookup_ror_types(path, &ror_url).unwrap_or_default()
} else {
Vec::new()
};
let sqlite_works = if let Some(ref path) = db {
commonmeta::read_sqlite_by_ror(&ror_url, path).unwrap_or_default()
} else {
Vec::new()
};
let mut works = sqlite_works;
if !local_only && works.len() < 200 {
let mut cr = commonmeta::fetch_crossref_by_ror(&ror_url, 100, 1).unwrap_or_default();
let mut dc = commonmeta::fetch_datacite_by_ror(&ror_url, 100, 1).unwrap_or_default();
cr.append(&mut dc);
works.append(&mut cr);
}
let mut seen = std::collections::HashSet::new();
works.retain(|w| seen.insert(w.id.clone()));
works.sort_by(|a, b| b.date_published.cmp(&a.date_published));
works.truncate(200);
Ok((data, org_types, works, rel_names))
})
.await
.map_err(|e| AppError::Internal(e.to_string()))?
}
fn is_localhost(headers: &HeaderMap) -> bool {
headers
.get(header::HOST)
.and_then(|v| v.to_str().ok())
.map(|h| h.starts_with("localhost") || h.starts_with("127.0.0.1"))
.unwrap_or(false)
}
fn resolve_source(doi: &str, source: Option<&str>) -> String {
if let Some(s) = source {
return s.to_lowercase();
}
match commonmeta::doi_utils::get_doi_ra_sync(doi, false)
.as_deref()
.map(str::to_lowercase)
.as_deref()
{
Some("crossref") => "crossref".to_string(),
Some("datacite") => "datacite".to_string(),
_ => "crossref".to_string(),
}
}
#[cfg(test)]
mod tests {
use super::*;
use axum::{body::Body, http::Request};
use tower::ServiceExt;
fn make_test_db() -> (tempfile::TempDir, Arc<PathBuf>) {
let dir = tempfile::tempdir().unwrap();
let path = db::tests::make_test_db(&dir.path().join("test.sqlite3"));
(dir, Arc::new(path))
}
fn make_test_cache_db() -> (tempfile::TempDir, Arc<PathBuf>) {
let dir = tempfile::tempdir().unwrap();
let path = db::tests::make_test_cache_db(&dir.path().join("cache.sqlite3"));
(dir, Arc::new(path))
}
fn app_no_db() -> Router {
build_app(AppState { db: None, cache: None })
}
fn app_with_db() -> (tempfile::TempDir, Router) {
let (dir, db) = make_test_db();
let app = build_app(AppState { db: Some(db), cache: None });
(dir, app)
}
fn app_with_cache_only() -> (tempfile::TempDir, Router) {
let (dir, cache) = make_test_cache_db();
let app = build_app(AppState { db: None, cache: Some(cache) });
(dir, app)
}
#[tokio::test]
async fn index_returns_200() {
let response = app_no_db()
.oneshot(Request::builder().uri("/").body(Body::empty()).unwrap())
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
}
#[tokio::test]
async fn schema_json_returns_json() {
let response = app_no_db()
.oneshot(
Request::builder()
.uri("/commonmeta_v1.0.json")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let ct = response.headers().get("content-type").unwrap().to_str().unwrap();
assert!(ct.contains("application/json"));
}
#[tokio::test]
async fn non_doi_path_returns_404() {
let response = app_no_db()
.oneshot(
Request::builder()
.uri("/not-a-doi")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::NOT_FOUND);
}
#[tokio::test]
async fn arbitrary_text_path_returns_404() {
let response = app_no_db()
.oneshot(
Request::builder()
.uri("/hello/world")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::NOT_FOUND);
}
#[tokio::test]
async fn unsupported_accept_returns_406() {
let response = app_no_db()
.oneshot(
Request::builder()
.uri("/10.1234/test")
.header("Accept", "application/rdf+xml")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::NOT_ACCEPTABLE);
}
#[tokio::test]
async fn multiple_unsupported_accept_returns_406() {
let response = app_no_db()
.oneshot(
Request::builder()
.uri("/10.1234/test")
.header("Accept", "application/rdf+xml, image/png")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::NOT_ACCEPTABLE);
}
#[tokio::test]
async fn invalid_format_query_param_returns_406() {
let response = app_no_db()
.oneshot(
Request::builder()
.uri("/10.1234/test?format=nonsense")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::NOT_ACCEPTABLE);
}
#[tokio::test]
async fn html_accept_with_cache_returns_redirect() {
let (_dir, app) = app_with_cache_only();
let response = app
.oneshot(
Request::builder()
.uri("/10.5678/cached")
.header("Accept", "text/html")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let ct = response.headers().get("content-type").unwrap().to_str().unwrap();
assert!(ct.contains("text/html"), "unexpected content-type: {ct}");
}
#[tokio::test]
async fn bibtex_accept_with_cache_returns_200() {
let (_dir, app) = app_with_cache_only();
let response = app
.oneshot(
Request::builder()
.uri("/10.5678/cached")
.header("Accept", "application/x-bibtex")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let ct = response.headers().get("content-type").unwrap().to_str().unwrap();
assert!(ct.contains("application/x-bibtex"), "unexpected content-type: {ct}");
let body = axum::body::to_bytes(response.into_body(), 1024 * 1024).await.unwrap();
let text = std::str::from_utf8(&body).unwrap();
assert!(text.starts_with('@'), "bibtex should start with '@': {text}");
}
#[tokio::test]
async fn html_accept_with_db_returns_redirect() {
let (_dir, app) = app_with_db();
let response = app
.oneshot(
Request::builder()
.uri("/10.1234/test")
.header("Accept", "text/html")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let ct = response.headers().get("content-type").unwrap().to_str().unwrap();
assert!(ct.contains("text/html"), "unexpected content-type: {ct}");
}
#[tokio::test]
async fn bibtex_accept_with_db_returns_200() {
let (_dir, app) = app_with_db();
let response = app
.oneshot(
Request::builder()
.uri("/10.1234/test")
.header("Accept", "application/x-bibtex")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let ct = response.headers().get("content-type").unwrap().to_str().unwrap();
assert!(ct.contains("application/x-bibtex"), "unexpected content-type: {ct}");
let body = axum::body::to_bytes(response.into_body(), 1024 * 1024)
.await
.unwrap();
let text = std::str::from_utf8(&body).unwrap();
assert!(text.starts_with('@'), "bibtex should start with '@': {text}");
}
#[tokio::test]
async fn format_query_param_with_db_returns_bibtex() {
let (_dir, app) = app_with_db();
let response = app
.oneshot(
Request::builder()
.uri("/10.1234/test?format=bibtex")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let ct = response.headers().get("content-type").unwrap().to_str().unwrap();
assert!(ct.contains("application/x-bibtex"), "unexpected content-type: {ct}");
}
#[tokio::test]
async fn dois_route_returns_datacite_json() {
let (_dir, app) = app_with_db();
let response = app
.oneshot(
Request::builder()
.uri("/dois/10.1234/test")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let ct = response.headers().get("content-type").unwrap().to_str().unwrap();
assert!(ct.contains("datacite.datacite+json"), "unexpected content-type: {ct}");
let body = axum::body::to_bytes(response.into_body(), 1024 * 1024).await.unwrap();
let v: serde_json::Value = serde_json::from_slice(&body).unwrap();
assert!(v.get("id").is_some() || v.get("doi").is_some(), "no id/doi field in datacite json");
}
#[tokio::test]
async fn works_route_returns_crossref_json() {
let (_dir, app) = app_with_db();
let response = app
.oneshot(
Request::builder()
.uri("/works/10.1234/test")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let ct = response.headers().get("content-type").unwrap().to_str().unwrap();
assert!(ct.contains("vnd.crossref+json"), "unexpected content-type: {ct}");
let body = axum::body::to_bytes(response.into_body(), 1024 * 1024).await.unwrap();
let v: serde_json::Value = serde_json::from_slice(&body).unwrap();
assert!(v["message"].get("DOI").is_some(), "no message.DOI field in crossref json: {v}");
}
#[tokio::test]
async fn csl_json_accept_with_db_returns_200() {
let (_dir, app) = app_with_db();
let response = app
.oneshot(
Request::builder()
.uri("/10.1234/test")
.header("Accept", "application/vnd.citationstyles.csl+json")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let ct = response.headers().get("content-type").unwrap().to_str().unwrap();
assert!(ct.contains("citationstyles.csl+json"), "unexpected content-type: {ct}");
}
#[test]
fn html_escape_replaces_special_chars() {
assert_eq!(html_escape("a & b"), "a & b");
assert_eq!(html_escape("say \"hi\""), "say "hi"");
assert_eq!(html_escape("<script>"), "<script>");
assert_eq!(html_escape("no change"), "no change");
}
#[test]
fn build_og_tags_work_json() {
let json = r#"{"id":"https://doi.org/10.1234/test","title":"A Title","type":"JournalArticle","description":"Abstract text"}"#;
let tags = build_og_tags(json);
assert!(tags.contains(r#"property="og:title" content="A Title""#), "missing title: {tags}");
assert!(tags.contains(r#"property="og:url" content="https://doi.org/10.1234/test""#), "missing url: {tags}");
assert!(tags.contains(r#"property="og:type" content="article""#), "missing type: {tags}");
assert!(tags.contains(r#"property="og:description" content="Abstract text""#), "missing description: {tags}");
assert!(tags.contains(r#"name="twitter:card""#), "missing twitter:card: {tags}");
}
#[test]
fn build_og_tags_no_description_omits_tag() {
let json = r#"{"id":"https://doi.org/10.1234/test","title":"A Title","type":"JournalArticle"}"#;
let tags = build_og_tags(json);
assert!(!tags.contains("og:description"), "should not emit og:description when empty: {tags}");
}
#[test]
fn build_og_tags_escapes_html_in_title() {
let json = r#"{"id":"https://doi.org/10.1234/x","title":"Rock & Roll: \"Loud\"","type":"JournalArticle"}"#;
let tags = build_og_tags(json);
assert!(tags.contains("Rock & Roll: "Loud""), "title not escaped: {tags}");
}
#[test]
fn build_og_tags_orcid_person_array() {
let json = r#"[{"id":"https://orcid.org/0000-0001-2345-6789","given_name":"Jane","family_name":"Doe","type":"Person"},{"id":"https://doi.org/10.1/w","title":"A Work"}]"#;
let tags = build_og_tags(json);
assert!(tags.contains(r#"content="Jane Doe""#), "missing person name: {tags}");
assert!(tags.contains(r#"property="og:type" content="profile""#), "wrong og:type for ORCID: {tags}");
assert!(tags.contains(r#"content="https://orcid.org/0000-0001-2345-6789""#), "missing ORCID url: {tags}");
}
#[test]
fn build_og_tags_ror_org_array() {
let json = r#"[{"id":"https://ror.org/01234567","name":"State University","type":"Organization"}]"#;
let tags = build_og_tags(json);
assert!(tags.contains(r#"content="State University""#), "missing org name: {tags}");
assert!(tags.contains(r#"property="og:type" content="website""#), "wrong og:type for ROR: {tags}");
}
#[tokio::test]
async fn html_response_includes_og_tags() {
let (_dir, app) = app_with_db();
let response = app
.oneshot(
Request::builder()
.uri("/10.1234/test")
.header("Accept", "text/html")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let body = axum::body::to_bytes(response.into_body(), 4 * 1024 * 1024).await.unwrap();
let html = std::str::from_utf8(&body).unwrap();
assert!(html.contains(r#"property="og:title""#), "missing og:title tag in HTML");
assert!(html.contains(r#"property="og:url""#), "missing og:url tag in HTML");
assert!(html.contains(r#"property="og:type""#), "missing og:type tag in HTML");
assert!(html.contains("Test Article on Content Negotiation"), "missing title value in HTML");
assert!(html.contains(r#"name="twitter:card""#), "missing twitter:card in HTML");
}
#[tokio::test]
async fn cached_html_response_includes_og_tags() {
let (_dir, app) = app_with_cache_only();
let response = app
.oneshot(
Request::builder()
.uri("/10.5678/cached")
.header("Accept", "text/html")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
let body = axum::body::to_bytes(response.into_body(), 4 * 1024 * 1024).await.unwrap();
let html = std::str::from_utf8(&body).unwrap();
assert!(html.contains(r#"property="og:title""#), "missing og:title tag in cached HTML");
assert!(html.contains("Cached Article"), "missing cached title value in HTML");
}
}