tohaya 0.1.2

Convert citation file formats to hayagriva YAML
Documentation
use biblib::CitationParser;
use hayagriva::types::{
    Date, EntryType, FormatString, MaybeTyped, Person, Publisher, QualifiedUrl,
};
use std::sync::LazyLock;
use unic_langid_impl::LanguageIdentifier;

pub(crate) fn parse_pubmed<S: AsRef<str>>(
    input: S,
) -> Result<hayagriva::Library, crate::ParseError> {
    let parser = biblib::PubMedParser::new();
    let citations = parser.parse(input.as_ref())?;
    citations.into_iter().map(to_entry).collect()
}

fn to_entry(citation: biblib::Citation) -> Result<hayagriva::Entry, crate::ParseError> {
    let mut errors = Vec::with_capacity(10);
    let key = key_of(&citation);
    let entry_type = entry_type_of(&citation)?;
    let mut entry = hayagriva::Entry::new(&key, entry_type);
    entry.set_title(FormatString::with_value(citation.title));
    entry.set_authors(citation.authors.into_iter().map(to_person).collect());

    if let Some(journal) = citation
        .journal_abbr
        .as_deref()
        .or(citation.journal.as_deref())
    {
        let mut parent = hayagriva::Entry::new(journal, EntryType::Periodical);
        if let Some(value) = citation.journal {
            let short = citation.journal_abbr.unwrap_or_else(|| value.to_string());
            let title = FormatString::with_short(value, short);
            parent.set_title(title);
        } else if let Some(value) = citation.journal_abbr {
            let title = FormatString::with_value(value);
            parent.set_title(title);
        };
        if let Some(volume) = citation.volume {
            parent.set_volume(MaybeTyped::String(volume));
        }
        if let Some(issue) = citation.issue {
            parent.set_issue(MaybeTyped::String(issue));
        }
        if let Some(publisher) = citation.publisher {
            let name = FormatString::with_value(publisher);
            parent.set_publisher(Publisher::new(Some(name), None));
        }
        entry.set_parents(vec![parent]);
    }

    if let Some(date) = citation.date {
        entry.set_date(Date {
            year: date.year,
            month: date.month.and_then(|m| m.checked_sub(1)),
            day: date.day,
            approximate: false,
        });
    }
    if let Some(pages) = citation.pages {
        entry.set_page_range(MaybeTyped::String(pages));
    }
    if let Some(issn) = citation.issn.into_iter().next() {
        entry.set_issn(issn);
    }
    if let Some(doi) = citation.doi {
        entry.set_doi(doi);
    }
    if let Some(pmid) = citation.pmid {
        entry.set_pmid(pmid);
    }
    if let Some(pmcid) = citation.pmc_id {
        entry.set_pmcid(pmcid);
    }
    if let Some(abstract_) = citation.abstract_text {
        entry.set_abstract_(FormatString::with_value(abstract_));
    }
    if let Some(url) = citation.urls.first() {
        match url::Url::parse(url) {
            Ok(value) => {
                entry.set_url(QualifiedUrl::new(value, None));
            }
            Err(e) => {
                errors.push(e);
            }
        }
    }
    if let Some(language) = &citation.language {
        if language == "eng" {
            entry.set_language(ENGLISH.clone());
        }
    }
    Ok(entry)
}

fn to_person(author: biblib::Author) -> Person {
    Person {
        name: author.name,
        given_name: author.given_name,
        prefix: None,
        suffix: None,
        alias: None,
    }
}

fn key_of(citation: &biblib::Citation) -> String {
    if let Some(author) = citation.authors.first() {
        let name = author
            .name
            .to_lowercase()
            .replace(|c: char| !c.is_alphanumeric(), "");
        if let Some(date) = &citation.date {
            format!("{name}{}", date.year)
        } else {
            name
        }
    } else {
        "key".to_string()
    }
}

fn entry_type_of(citation: &biblib::Citation) -> Result<EntryType, crate::ParseError> {
    if citation
        .citation_type
        .iter()
        .any(|s| s == "Journal Article")
    {
        Ok(EntryType::Article)
    } else {
        Err(crate::ParseError(format!(
            "Unsupported citation types: {}",
            citation.citation_type.join(", ")
        )))
    }
}

static ENGLISH: LazyLock<LanguageIdentifier> =
    LazyLock::new(|| LanguageIdentifier::from_bytes(b"en").unwrap());