use clap::{Arg, ArgAction, ArgMatches, Command};
use std::path::Path;
use crate::cmd::resolve_db_path;
use crate::doi_utils::get_doi_ra_sync;
pub fn command() -> Command {
Command::new("convert")
.about("Convert scholarly metadata between formats")
.long_about(
"Convert scholarly metadata between formats.\n\n\
The input is a file path, a DOI/URL, or a ROR organization ID. \
When --from is omitted the format is auto-detected: DOIs are \
resolved via the DOI RA API; ROR URLs are detected by pattern; \
JSON files are inspected for schema markers.\n\n\
For ROR input, a local 'commonmeta.sqlite3' in the current \
directory (produced by 'commonmeta list --to ror --file \
commonmeta.sqlite3') is queried first — faster and offline. \
The ROR API is used as a fallback when no local database exists.\n\n\
Supported input formats: crossref, commonmeta, ror\n\
Supported output formats: commonmeta, csl, ror, inveniordm\n\n\
Examples:\n\n\
commonmeta convert 10.5555/12345678\n\
commonmeta convert https://doi.org/10.59350/gj8re-sca95 --to csl\n\
commonmeta convert https://ror.org/02nr0ka47\n\
commonmeta convert https://ror.org/02nr0ka47 --to inveniordm\n\
commonmeta convert record.json --from commonmeta --to csl --file out.json",
)
.arg(
Arg::new("input")
.help("File path, DOI, URL, or ROR ID")
.required(true)
.index(1),
)
.arg(
Arg::new("from")
.long("from")
.short('f')
.help("Input format (crossref, commonmeta, ror); auto-detected if omitted"),
)
.arg(
Arg::new("to")
.long("to")
.short('t')
.help("Output format (commonmeta, csl, ror, inveniordm)")
.default_value("commonmeta"),
)
.arg(
Arg::new("style")
.long("style")
.short('s')
.help("CSL style name for citation output (default: apa)"),
)
.arg(
Arg::new("locale")
.long("locale")
.short('l')
.help("BCP 47 locale for citation output (e.g. de-DE)"),
)
.arg(
Arg::new("file")
.long("file")
.help("Write output to this file instead of stdout"),
)
.arg(
Arg::new("no-network")
.long("no-network")
.help("Disable all outbound network requests; fails if the operation would require network access")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new("network")
.long("network")
.help("For ORCID input: also query Crossref and DataCite even when local results exist; results are cached in cache.sqlite3")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new("number")
.long("number")
.short('n')
.help("For ORCID input: number of works to fetch per source (Crossref, DataCite); 0 fetches all (default: 100)")
.value_parser(clap::value_parser!(usize))
.default_value("100"),
)
.arg(
Arg::new("page")
.long("page")
.short('p')
.help("For ORCID input: page number (1-based) for Crossref and DataCite API requests")
.value_parser(clap::value_parser!(usize))
.default_value("1"),
)
}
fn doi_prefix(s: &str) -> Option<String> {
let doi = s
.trim_start_matches("https://doi.org/")
.trim_start_matches("http://doi.org/")
.trim_start_matches("https://dx.doi.org/")
.trim_start_matches("http://dx.doi.org/")
.trim_start_matches("https://commonmeta.org/")
.trim_start_matches("http://commonmeta.org/");
if doi.starts_with("10.") {
doi.find('/').map(|i| doi[..i].to_string())
} else {
None
}
}
pub(crate) fn ra_to_reader(ra: &str) -> Option<&'static str> {
match ra {
"Crossref" | "crossref" => Some("crossref"),
"DataCite" | "datacite" => Some("datacite"),
_ => None,
}
}
pub(crate) fn detect_format(input: &str, no_network: bool) -> String {
let id_type = crate::utils::validate_id(input).1;
match id_type {
"ROR" => return "ror".to_string(),
"ORCID" => return "orcid".to_string(),
_ => {}
}
if doi_prefix(input).is_some() {
return get_doi_ra_sync(input, no_network)
.unwrap_or_else(|| "crossref".to_string());
}
if crate::utils::validate_openalex(input)
.map(|id| id.starts_with('W'))
.unwrap_or(false)
{
return "openalex".to_string();
}
if input.contains("pubmed.ncbi.nlm.nih.gov")
|| input.contains("europepmc.org/article")
|| input.contains("pmc.ncbi.nlm.nih.gov/articles")
|| (input.starts_with("PMC") && input[3..].chars().all(|c| c.is_ascii_digit()) && !input[3..].is_empty())
{
return "pubmed".to_string();
}
if let Ok(v) = serde_json::from_str::<serde_json::Value>(input) {
if v.get("schema_version")
.and_then(|s| s.as_str())
.map(|s| s.starts_with("https://commonmeta.org"))
.unwrap_or(false)
{
return "commonmeta".to_string();
}
if v.get("message-type").is_some() {
return "crossref".to_string();
}
if v.get("id")
.and_then(|s| s.as_str())
.map(|s| s.starts_with("https://ror.org/"))
.unwrap_or(false)
&& v.get("names").is_some()
{
return "ror".to_string();
}
}
"commonmeta".to_string()
}
pub fn execute(matches: &ArgMatches) -> Result<(), String> {
let input_arg = matches.get_one::<String>("input").expect("required");
let out_file = matches.get_one::<String>("file");
let no_network = matches.get_flag("no-network");
let force_network = matches.get_flag("network");
let number = *matches.get_one::<usize>("number").unwrap_or(&10);
let page = *matches.get_one::<usize>("page").unwrap_or(&1);
let style = matches.get_one::<String>("style").map(String::as_str);
let locale = matches.get_one::<String>("locale").map(String::as_str);
let to_arg = matches.get_one::<String>("to").expect("has default");
let is_local_file = Path::new(input_arg).exists();
if no_network && !is_local_file && doi_prefix(input_arg).is_some() {
let bare = input_arg
.trim_start_matches("https://doi.org/")
.trim_start_matches("http://doi.org/")
.trim_start_matches("https://dx.doi.org/")
.trim_start_matches("http://dx.doi.org/");
let doi_url = format!("https://doi.org/{}", bare);
let db_path_str = resolve_db_path(None);
let db_path = Path::new(&db_path_str);
if !db_path.exists() {
return Err(format!(
"local database not found at '{}'; \
run 'commonmeta import {}' or remove --no-network",
db_path_str, input_arg
));
}
let not_found = || format!(
"'{}' not found in local database '{}'; \
run 'commonmeta import {}' or remove --no-network",
input_arg, db_path_str, input_arg
);
let data = match crate::read_sqlite_by_id(&doi_url, db_path) {
Ok(Some(d)) => d,
Ok(None) => return Err(not_found()),
Err(e) if e.to_string().contains("no such table") => return Err(not_found()),
Err(e) => return Err(e.to_string()),
};
let json = serde_json::to_string(&data).map_err(|e| e.to_string())?;
let to_arg = to_arg.as_str();
let output = if to_arg == "citation" {
crate::convert_citation("commonmeta", &json, style, locale)
.map_err(|e| e.to_string())?
} else {
crate::convert("commonmeta", to_arg, &json).map_err(|e| e.to_string())?
};
return write_output(&output, to_arg, out_file);
}
let input = if is_local_file {
std::fs::read_to_string(input_arg)
.map_err(|e| format!("failed to read '{}': {}", input_arg, e))?
} else {
input_arg.clone()
};
let from_explicit = matches.get_one::<String>("from");
let from_detected = from_explicit.is_none().then(|| detect_format(&input, no_network));
if let Some(ref ra) = from_detected {
if doi_prefix(input_arg).is_some() && ra_to_reader(ra).is_none() {
return Err(format!(
"convert: no reader available for DOI registration agency '{}' (prefix {})",
ra,
doi_prefix(input_arg).unwrap_or_default()
));
}
}
let from = from_explicit
.map(|f| f.as_str().to_string())
.or_else(|| from_detected.as_deref().and_then(ra_to_reader).map(str::to_string))
.unwrap_or_else(|| from_detected.unwrap_or_else(|| "commonmeta".to_string()));
let to_arg = to_arg.as_str();
let to = to_arg;
if from == "orcid" {
let orcid_url = crate::utils::normalize_orcid(&input);
if orcid_url.is_empty() {
return Err(format!("'{}' is not a valid ORCID identifier", input));
}
let db_path_str = resolve_db_path(None);
let db_path = Path::new(&db_path_str);
let value = if db_path.exists() {
match crate::fetch_orcid_person_json_sqlite(&orcid_url, db_path) {
Ok(v) => Ok(v),
Err(_) if no_network => return Err(format!(
"'{}' not found in local database '{}'; \
run 'commonmeta import {}' or remove --no-network",
input, db_path_str, input
)),
Err(_) => crate::fetch_orcid_person_json(&orcid_url),
}
} else if no_network {
return Err(format!(
"ORCID lookup requires network access (no local database at '{}'); \
run 'commonmeta import {}' or remove --no-network",
db_path_str, input
));
} else {
crate::fetch_orcid_person_json(&orcid_url)
}
.map_err(|e| e.to_string())?;
let needs_affiliations = matches!(to, "commonmeta" | "inveniordm");
let affiliations = if needs_affiliations {
let sqlite_affs = if db_path.exists() {
crate::fetch_orcid_affiliations_sqlite(&orcid_url, db_path)
} else {
Vec::new()
};
if !sqlite_affs.is_empty() {
sqlite_affs
} else if !no_network {
let ror_db = if db_path.exists() { Some(db_path) } else { None };
crate::fetch_orcid_affiliations(&orcid_url, ror_db).unwrap_or_default()
} else {
Vec::new()
}
} else {
Vec::new()
};
let works = if to == "commonmeta" {
let sqlite_works = if db_path.exists() && page == 1 {
crate::read_sqlite_by_orcid(&orcid_url, db_path).unwrap_or_default()
} else {
Vec::new()
};
let need_network = !no_network && (sqlite_works.is_empty() || force_network || page > 1);
let network_works = if need_network {
if number == 0 {
let mut cr = crate::fetch_all_crossref_by_orcid(&orcid_url).unwrap_or_default();
let mut dc = crate::fetch_all_datacite_by_orcid(&orcid_url).unwrap_or_default();
cr.append(&mut dc);
cr
} else {
let mut cr = crate::fetch_crossref_by_orcid(&orcid_url, number, page)
.unwrap_or_default();
let mut dc = crate::fetch_datacite_by_orcid(&orcid_url, number, page)
.unwrap_or_default();
cr.append(&mut dc);
cr
}
} else {
Vec::new()
};
if !network_works.is_empty() {
let cache_path_str = crate::cmd::resolve_cache_db_path(None);
let cache_path = std::path::Path::new(&cache_path_str);
if let Some(parent) = cache_path.parent() {
let _ = std::fs::create_dir_all(parent);
}
let _ = crate::upsert_sqlite(&network_works, cache_path);
}
let mut seen = std::collections::HashSet::new();
let mut all: Vec<crate::Data> = sqlite_works
.into_iter()
.chain(network_works)
.filter(|d| seen.insert(d.id.clone()))
.collect();
all.sort_by(|a, b| b.date_published.cmp(&a.date_published));
if number > 0 {
all.truncate(number);
}
all
} else {
Vec::new()
};
let output = match to {
"inveniordm" => crate::write_orcid_inveniordm_yaml(&value, &affiliations)
.map_err(|e| e.to_string())?,
"orcid" => crate::write_orcid_json(&value).map_err(|e| e.to_string())?,
_ => crate::write_orcid_commonmeta(&value, &affiliations, &works)
.map_err(|e| e.to_string())?,
};
return write_output(&output, to, out_file);
}
if from == "ror" {
let ror_url = crate::utils::normalize_ror(&input);
if ror_url.is_empty() {
return Err(format!("'{}' is not a valid ROR identifier", input));
}
let db_path_str = resolve_db_path(None);
let db_path = Path::new(&db_path_str);
if to != "commonmeta" && to != "inveniordm" {
let mut raw = if db_path.exists() {
match crate::fetch_ror_raw_sqlite(&ror_url, db_path) {
Ok(r) => Ok(r),
Err(_) if no_network => return Err(format!(
"'{}' not found in local database '{}'; run 'commonmeta import --from ror'",
input, db_path_str
)),
Err(_) => crate::fetch_ror_raw(&ror_url),
}
} else if no_network {
return Err(format!(
"ROR lookup requires network access (no local database at '{}'); run 'commonmeta import --from ror'",
db_path_str
));
} else {
crate::fetch_ror_raw(&ror_url)
}.map_err(|e| e.to_string())?;
if db_path.exists() {
crate::enrich_ror_locations(&mut raw, db_path);
}
let output = crate::write_ror_v2_json(&raw).map_err(|e| e.to_string())?;
return write_output(&output, to, out_file);
}
let data = if db_path.exists() {
match crate::fetch_ror_sqlite(&ror_url, db_path) {
Ok(d) => Ok(d),
Err(_) if no_network => return Err(format!(
"'{}' not found in local database '{}'; \
run 'commonmeta import --from ror' or remove --no-network",
input, db_path_str
)),
Err(_) => crate::fetch_ror(&ror_url),
}
} else if no_network {
return Err(format!(
"ROR lookup requires network access (no local database at '{}'); \
run 'commonmeta import --from ror' or remove --no-network",
db_path_str
));
} else {
crate::fetch_ror(&ror_url)
}
.map_err(|e| e.to_string())?;
if to == "inveniordm" {
let output = crate::write("ror", &data).map_err(|e| e.to_string())?;
return write_output(&output, to, out_file);
}
let works_db = Path::new(&db_path_str);
let sqlite_works = if works_db.exists() && page == 1 {
crate::read_sqlite_by_ror(&ror_url, works_db).unwrap_or_default()
} else {
Vec::new()
};
let need_network = !no_network && (sqlite_works.is_empty() || force_network || page > 1);
let network_works = if need_network {
let mut cr = crate::fetch_crossref_by_ror(&ror_url, number, page).unwrap_or_default();
let mut dc = crate::fetch_datacite_by_ror(&ror_url, number, page).unwrap_or_default();
cr.append(&mut dc);
cr
} else {
Vec::new()
};
if !network_works.is_empty() {
let cache_path_str = crate::cmd::resolve_cache_db_path(None);
let cache_path = std::path::Path::new(&cache_path_str);
if let Some(parent) = cache_path.parent() {
let _ = std::fs::create_dir_all(parent);
}
let _ = crate::upsert_sqlite(&network_works, cache_path);
}
let mut seen = std::collections::HashSet::new();
let mut all: Vec<crate::Data> = sqlite_works
.into_iter()
.chain(network_works)
.filter(|d| seen.insert(d.id.clone()))
.collect();
all.sort_by(|a, b| b.date_published.cmp(&a.date_published));
all.truncate(number);
let org_val = serde_json::to_value(&data).map_err(|e| e.to_string())?;
let mut items = vec![org_val];
for work in &all {
let prepared = crate::prepare_commonmeta(work);
items.push(serde_json::to_value(&prepared).map_err(|e| e.to_string())?);
}
let output = serde_json::to_vec_pretty(&items).map_err(|e| e.to_string())?;
return write_output(&output, to, out_file);
}
if to == "commonmeta" {
let mut data = crate::read(&from, &input).map_err(|e| e.to_string())?;
let db_path_str = resolve_db_path(None);
let db_path = Path::new(&db_path_str);
crate::enrich_citations(&mut data, db_path);
let related_works = crate::fetch_reference_works(&data, Some(db_path), no_network);
let mut main_prepared = crate::prepare_commonmeta(&data);
main_prepared.references.clear();
let main = serde_json::to_value(main_prepared).map_err(|e| e.to_string())?;
let mut items = vec![main];
for work in &related_works {
let mut prepared = crate::prepare_commonmeta(work);
prepared.references.clear();
items.push(serde_json::to_value(prepared).map_err(|e| e.to_string())?);
}
let output = serde_json::to_vec_pretty(&items).map_err(|e| e.to_string())?;
return write_output(&output, to, out_file);
}
let output = if to == "citation" {
crate::convert_citation(&from, &input, style, locale).map_err(|e| e.to_string())?
} else {
crate::convert(&from, to, &input).map_err(|e| e.to_string())?
};
write_output(&output, to, out_file)
}
fn write_output(output: &[u8], to: &str, out_file: Option<&String>) -> Result<(), String> {
let formatted: Vec<u8> = if matches!(to, "inveniordm") {
output.to_vec()
} else {
serde_json::from_slice::<serde_json::Value>(output)
.ok()
.and_then(|v| serde_json::to_vec_pretty(&v).ok())
.unwrap_or_else(|| output.to_vec())
};
match out_file {
Some(path) => std::fs::write(path, &formatted)
.map_err(|e| format!("failed to write '{}': {}", path, e)),
None => {
println!("{}", String::from_utf8_lossy(&formatted));
Ok(())
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn parse_args(args: &[&str]) -> clap::ArgMatches {
command().try_get_matches_from(args).expect("arg parse failed")
}
#[test]
fn test_no_network_with_doi_uses_local_db() {
let m = parse_args(&["convert", "--no-network", "10.7554/elife.01567"]);
match execute(&m) {
Ok(()) => {}
Err(e) => assert!(
e.contains("not found") || e.contains("--no-network"),
"expected local-db error, got: {e}"
),
}
}
#[test]
fn test_no_network_with_doi_url_uses_local_db() {
let m = parse_args(&["convert", "--no-network", "https://doi.org/10.7554/elife.01567"]);
match execute(&m) {
Ok(()) => {}
Err(e) => assert!(
e.contains("not found") || e.contains("--no-network"),
"expected local-db error, got: {e}"
),
}
}
#[test]
fn test_no_network_with_local_json_passes_guard() {
let m = parse_args(&["convert", "--no-network", r#"{"type":"JournalArticle"}"#]);
let err = execute(&m).unwrap_err();
assert!(
!err.contains("--no-network"),
"should not fail at network guard for inline JSON, got: {err}"
);
}
}