dragoman 0.3.5

DOI redirection and content negotiation server
import katex from 'katex'

export const TYPE_LABELS = {
  'de-DE': {
    Article: 'Artikel', Audiovisual: 'Audiovisuell', BlogPost: 'Blogbeitrag',
    Book: 'Buch', BookChapter: 'Buchkapitel', BookSeries: 'Buchreihe',
    Collection: 'Sammlung', Component: 'Komponente', ComputationalNotebook: 'Computational Notebook',
    DataPaper: 'Datenpapier', Dataset: 'Datensatz', Dissertation: 'Dissertation',
    Document: 'Dokument', Entry: 'Eintrag', Event: 'Veranstaltung',
    Figure: 'Abbildung', Grant: 'Förderantrag', Image: 'Bild',
    Instrument: 'Instrument', InteractiveResource: 'Interaktive Ressource',
    Journal: 'Zeitschrift', JournalArticle: 'Zeitschriftenartikel',
    LegalDocument: 'Rechtsdokument', Manuscript: 'Manuskript', Map: 'Karte',
    Model: 'Modell', Other: 'Sonstiges', OutputManagementPlan: 'Datenmanagementplan',
    Patent: 'Patent', PeerReview: 'Fachgutachten', Performance: 'Aufführung',
    PersonalCommunication: 'Persönliche Mitteilung', PhysicalObject: 'Physisches Objekt',
    Poster: 'Poster', Presentation: 'Präsentation', Preprint: 'Preprint',
    ProceedingsArticle: 'Konferenzbeitrag', Report: 'Bericht', Service: 'Dienst',
    Software: 'Software', Sound: 'Ton', Standard: 'Norm',
    StudyRegistration: 'Studienregistrierung', WebPage: 'Webseite',
  },
  'fr-FR': {
    Article: 'Article', Audiovisual: 'Audiovisuel', BlogPost: 'Article de blog',
    Book: 'Livre', BookChapter: 'Chapitre de livre', BookSeries: 'Série de livres',
    Collection: 'Collection', Component: 'Composant', ComputationalNotebook: 'Carnet de calcul',
    DataPaper: 'Article de données', Dataset: 'Ensemble de données', Dissertation: 'Thèse',
    Document: 'Document', Entry: 'Entrée', Event: 'Événement',
    Figure: 'Figure', Grant: 'Subvention', Image: 'Image',
    Instrument: 'Instrument', InteractiveResource: 'Ressource interactive',
    Journal: 'Revue', JournalArticle: 'Article de revue',
    LegalDocument: 'Document juridique', Manuscript: 'Manuscrit', Map: 'Carte',
    Model: 'Modèle', Other: 'Autre', OutputManagementPlan: 'Plan de gestion des données',
    Patent: 'Brevet', PeerReview: 'Évaluation par les pairs', Performance: 'Représentation',
    PersonalCommunication: 'Communication personnelle', PhysicalObject: 'Objet physique',
    Poster: 'Affiche', Presentation: 'Présentation', Preprint: 'Préimpression',
    ProceedingsArticle: 'Article de conférence', Report: 'Rapport', Service: 'Service',
    Software: 'Logiciel', Sound: 'Son', Standard: 'Norme',
    StudyRegistration: "Enregistrement d'étude", WebPage: 'Page web',
  },
  'es-ES': {
    Article: 'Artículo', Audiovisual: 'Audiovisual', BlogPost: 'Entrada de blog',
    Book: 'Libro', BookChapter: 'Capítulo de libro', BookSeries: 'Serie de libros',
    Collection: 'Colección', Component: 'Componente', ComputationalNotebook: 'Cuaderno computacional',
    DataPaper: 'Artículo de datos', Dataset: 'Conjunto de datos', Dissertation: 'Tesis',
    Document: 'Documento', Entry: 'Entrada', Event: 'Evento',
    Figure: 'Figura', Grant: 'Subvención', Image: 'Imagen',
    Instrument: 'Instrumento', InteractiveResource: 'Recurso interactivo',
    Journal: 'Revista', JournalArticle: 'Artículo de revista',
    LegalDocument: 'Documento legal', Manuscript: 'Manuscrito', Map: 'Mapa',
    Model: 'Modelo', Other: 'Otro', OutputManagementPlan: 'Plan de gestión de datos',
    Patent: 'Patente', PeerReview: 'Revisión por pares', Performance: 'Actuación',
    PersonalCommunication: 'Comunicación personal', PhysicalObject: 'Objeto físico',
    Poster: 'Póster', Presentation: 'Presentación', Preprint: 'Preimpresión',
    ProceedingsArticle: 'Artículo de conferencia', Report: 'Informe', Service: 'Servicio',
    Software: 'Software', Sound: 'Sonido', Standard: 'Norma',
    StudyRegistration: 'Registro de estudio', WebPage: 'Página web',
  },
  'it-IT': {
    Article: 'Articolo', Audiovisual: 'Audiovisivo', BlogPost: 'Articolo di blog',
    Book: 'Libro', BookChapter: 'Capitolo di libro', BookSeries: 'Serie di libri',
    Collection: 'Raccolta', Component: 'Componente', ComputationalNotebook: 'Quaderno computazionale',
    DataPaper: 'Articolo di dati', Dataset: 'Set di dati', Dissertation: 'Tesi',
    Document: 'Documento', Entry: 'Voce', Event: 'Evento',
    Figure: 'Figura', Grant: 'Finanziamento', Image: 'Immagine',
    Instrument: 'Strumento', InteractiveResource: 'Risorsa interattiva',
    Journal: 'Rivista', JournalArticle: 'Articolo di rivista',
    LegalDocument: 'Documento legale', Manuscript: 'Manoscritto', Map: 'Mappa',
    Model: 'Modello', Other: 'Altro', OutputManagementPlan: 'Piano di gestione dei dati',
    Patent: 'Brevetto', PeerReview: 'Revisione paritaria', Performance: 'Rappresentazione',
    PersonalCommunication: 'Comunicazione personale', PhysicalObject: 'Oggetto fisico',
    Poster: 'Poster', Presentation: 'Presentazione', Preprint: 'Preprint',
    ProceedingsArticle: 'Atti di conferenza', Report: 'Rapporto', Service: 'Servizio',
    Software: 'Software', Sound: 'Suono', Standard: 'Norma',
    StudyRegistration: 'Registrazione dello studio', WebPage: 'Pagina web',
  },
  'ja-JP': {
    Article: '論文', Audiovisual: '映像・音声', BlogPost: 'ブログ記事',
    Book: '図書', BookChapter: '書籍の章', BookSeries: '叢書',
    Collection: 'コレクション', Component: 'コンポーネント', ComputationalNotebook: '計算ノートブック',
    DataPaper: 'データ論文', Dataset: 'データセット', Dissertation: '学位論文',
    Document: '文書', Entry: '項目', Event: 'イベント',
    Figure: '', Grant: '研究助成', Image: '画像',
    Instrument: '機器', InteractiveResource: 'インタラクティブリソース',
    Journal: '学術誌', JournalArticle: '学術論文',
    LegalDocument: '法的文書', Manuscript: '原稿', Map: '地図',
    Model: 'モデル', Other: 'その他', OutputManagementPlan: 'データ管理計画',
    Patent: '特許', PeerReview: '査読', Performance: 'パフォーマンス',
    PersonalCommunication: '個人通信', PhysicalObject: '物理的対象',
    Poster: 'ポスター', Presentation: 'プレゼンテーション', Preprint: 'プレプリント',
    ProceedingsArticle: '会議論文', Report: '報告書', Service: 'サービス',
    Software: 'ソフトウェア', Sound: '音声', Standard: '規格',
    StudyRegistration: '研究登録', WebPage: 'ウェブページ',
  },
  'ko-KR': {
    Article: '논문', Audiovisual: '시청각 자료', BlogPost: '블로그 게시물',
    Book: '도서', BookChapter: '책 챕터', BookSeries: '도서 시리즈',
    Collection: '컬렉션', Component: '컴포넌트', ComputationalNotebook: '계산 노트북',
    DataPaper: '데이터 논문', Dataset: '데이터셋', Dissertation: '학위논문',
    Document: '문서', Entry: '항목', Event: '이벤트',
    Figure: '그림', Grant: '연구 지원금', Image: '이미지',
    Instrument: '기기', InteractiveResource: '대화형 자료',
    Journal: '학술지', JournalArticle: '학술 논문',
    LegalDocument: '법률 문서', Manuscript: '원고', Map: '지도',
    Model: '모델', Other: '기타', OutputManagementPlan: '데이터 관리 계획',
    Patent: '특허', PeerReview: '동료 심사', Performance: '공연',
    PersonalCommunication: '개인 통신', PhysicalObject: '물리적 객체',
    Poster: '포스터', Presentation: '발표', Preprint: '프리프린트',
    ProceedingsArticle: '학술대회 논문', Report: '보고서', Service: '서비스',
    Software: '소프트웨어', Sound: '음성', Standard: '표준',
    StudyRegistration: '연구 등록', WebPage: '웹 페이지',
  },
  'nl-NL': {
    Article: 'Artikel', Audiovisual: 'Audiovisueel', BlogPost: 'Blogbericht',
    Book: 'Boek', BookChapter: 'Boekhoofdstuk', BookSeries: 'Boekreeks',
    Collection: 'Collectie', Component: 'Component', ComputationalNotebook: 'Berekeningsnotebook',
    DataPaper: 'Datapaper', Dataset: 'Dataset', Dissertation: 'Proefschrift',
    Document: 'Document', Entry: 'Invoer', Event: 'Evenement',
    Figure: 'Figuur', Grant: 'Subsidie', Image: 'Afbeelding',
    Instrument: 'Instrument', InteractiveResource: 'Interactieve bron',
    Journal: 'Tijdschrift', JournalArticle: 'Tijdschriftartikel',
    LegalDocument: 'Juridisch document', Manuscript: 'Manuscript', Map: 'Kaart',
    Model: 'Model', Other: 'Overig', OutputManagementPlan: 'Data-managementplan',
    Patent: 'Octrooi', PeerReview: 'Collegiale toetsing', Performance: 'Uitvoering',
    PersonalCommunication: 'Persoonlijke communicatie', PhysicalObject: 'Fysiek object',
    Poster: 'Poster', Presentation: 'Presentatie', Preprint: 'Preprint',
    ProceedingsArticle: 'Congresartikel', Report: 'Rapport', Service: 'Dienst',
    Software: 'Software', Sound: 'Geluid', Standard: 'Norm',
    StudyRegistration: 'Studieregistratie', WebPage: 'Webpagina',
  },
  'pt-BR': {
    Article: 'Artigo', Audiovisual: 'Audiovisual', BlogPost: 'Postagem de blog',
    Book: 'Livro', BookChapter: 'Capítulo de livro', BookSeries: 'Série de livros',
    Collection: 'Coleção', Component: 'Componente', ComputationalNotebook: 'Caderno computacional',
    DataPaper: 'Artigo de dados', Dataset: 'Conjunto de dados', Dissertation: 'Dissertação',
    Document: 'Documento', Entry: 'Entrada', Event: 'Evento',
    Figure: 'Figura', Grant: 'Financiamento de pesquisa', Image: 'Imagem',
    Instrument: 'Instrumento', InteractiveResource: 'Recurso interativo',
    Journal: 'Periódico', JournalArticle: 'Artigo de periódico',
    LegalDocument: 'Documento legal', Manuscript: 'Manuscrito', Map: 'Mapa',
    Model: 'Modelo', Other: 'Outro', OutputManagementPlan: 'Plano de gestão de dados',
    Patent: 'Patente', PeerReview: 'Revisão por pares', Performance: 'Apresentação artística',
    PersonalCommunication: 'Comunicação pessoal', PhysicalObject: 'Objeto físico',
    Poster: 'Pôster', Presentation: 'Apresentação', Preprint: 'Preprint',
    ProceedingsArticle: 'Artigo de conferência', Report: 'Relatório', Service: 'Serviço',
    Software: 'Software', Sound: 'Som', Standard: 'Norma',
    StudyRegistration: 'Registro de estudo', WebPage: 'Página web',
  },
  'sv-SE': {
    Article: 'Artikel', Audiovisual: 'Audiovisuellt material', BlogPost: 'Blogginlägg',
    Book: 'Bok', BookChapter: 'Bokkapitel', BookSeries: 'Bokserie',
    Collection: 'Samling', Component: 'Komponent', ComputationalNotebook: 'Beräkningsnotatbok',
    DataPaper: 'Datapaper', Dataset: 'Dataset', Dissertation: 'Avhandling',
    Document: 'Dokument', Entry: 'Post', Event: 'Evenemang',
    Figure: 'Figur', Grant: 'Anslag', Image: 'Bild',
    Instrument: 'Instrument', InteractiveResource: 'Interaktiv resurs',
    Journal: 'Tidskrift', JournalArticle: 'Tidskriftsartikel',
    LegalDocument: 'Juridiskt dokument', Manuscript: 'Manuskript', Map: 'Karta',
    Model: 'Modell', Other: 'Övrigt', OutputManagementPlan: 'Datahanteringsplan',
    Patent: 'Patent', PeerReview: 'Kollegial granskning', Performance: 'Föreställning',
    PersonalCommunication: 'Personlig kommunikation', PhysicalObject: 'Fysiskt objekt',
    Poster: 'Poster', Presentation: 'Presentation', Preprint: 'Preprint',
    ProceedingsArticle: 'Konferensartikel', Report: 'Rapport', Service: 'Tjänst',
    Software: 'Programvara', Sound: 'Ljud', Standard: 'Standard',
    StudyRegistration: 'Studieregistrering', WebPage: 'Webbsida',
  },
  'zh-CN': {
    Article: '文章', Audiovisual: '视听资料', BlogPost: '博客文章',
    Book: '图书', BookChapter: '书目章节', BookSeries: '丛书',
    Collection: '集合', Component: '组件', ComputationalNotebook: '计算笔记本',
    DataPaper: '数据论文', Dataset: '数据集', Dissertation: '学位论文',
    Document: '文档', Entry: '条目', Event: '活动',
    Figure: '图表', Grant: '科研经费', Image: '图像',
    Instrument: '仪器', InteractiveResource: '交互式资源',
    Journal: '期刊', JournalArticle: '期刊文章',
    LegalDocument: '法律文件', Manuscript: '手稿', Map: '地图',
    Model: '模型', Other: '其他', OutputManagementPlan: '数据管理计划',
    Patent: '专利', PeerReview: '同行评审', Performance: '表演',
    PersonalCommunication: '个人通信', PhysicalObject: '实体对象',
    Poster: '海报', Presentation: '演示文稿', Preprint: '预印本',
    ProceedingsArticle: '会议论文', Report: '报告', Service: '服务',
    Software: '软件', Sound: '音频', Standard: '标准',
    StudyRegistration: '研究注册', WebPage: '网页',
  },
}

export function typeLabel(type, locale) {
  if (!type) return ''
  return TYPE_LABELS[locale]?.[type] ?? type.replace(/([A-Z])/g, ' $1').trim()
}

export function formatDate(dateStr, locale) {
  if (!dateStr) return ''
  const parts = dateStr.split('-').map(Number)
  if (parts.length >= 3) {
    return new Intl.DateTimeFormat(locale, { year: 'numeric', month: 'long', day: 'numeric' })
      .format(new Date(parts[0], parts[1] - 1, parts[2]))
  } else if (parts.length === 2) {
    return new Intl.DateTimeFormat(locale, { year: 'numeric', month: 'long' })
      .format(new Date(parts[0], parts[1] - 1, 1))
  }
  return String(parts[0])
}

export function languageLabel(lang, locale) {
  if (!lang) return ''
  try {
    return new Intl.DisplayNames([locale], { type: 'language' }).of(lang) ?? lang
  } catch {
    return lang
  }
}

export function getCCIcons(id) {
  if (!id) return []
  const u = id.toUpperCase()
  if (!u.startsWith('CC')) return []
  const icons = ['fa-brands:creative-commons']
  if (u.startsWith('CC0') || u.includes('ZERO')) {
    icons.push('fa-brands:creative-commons-zero')
  } else {
    if (u.includes('-BY'))  icons.push('fa-brands:creative-commons-by')
    if (u.includes('-SA'))  icons.push('fa-brands:creative-commons-sa')
    if (u.includes('-NC'))  icons.push('fa-brands:creative-commons-nc')
    if (u.includes('-ND'))  icons.push('fa-brands:creative-commons-nd')
  }
  return icons
}

const KATEX_MACROS = {
  '\\upalpha': '\\alpha', '\\upbeta': '\\beta', '\\upgamma': '\\gamma',
  '\\updelta': '\\delta', '\\upepsilon': '\\epsilon', '\\upzeta': '\\zeta',
  '\\upeta': '\\eta', '\\uptheta': '\\theta', '\\upiota': '\\iota',
  '\\upkappa': '\\kappa', '\\uplambda': '\\lambda', '\\upmu': '\\mu',
  '\\upnu': '\\nu', '\\upxi': '\\xi', '\\uppi': '\\pi',
  '\\uprho': '\\rho', '\\upsigma': '\\sigma', '\\uptau': '\\tau',
  '\\upupsilon': '\\upsilon', '\\upphi': '\\phi', '\\upchi': '\\chi',
  '\\uppsi': '\\psi', '\\upomega': '\\omega',
}

const INLINE_TAGS = new Set(['b', 'i', 'strong', 'em', 'sup', 'sub'])

const ENCODED_TAG_RE = /<(\/?(?:b|i|em|strong|sup|sub))>/gi

function sanitizePlain(text) {
  if (!text) return ''
  // Decode HTML-entity-encoded inline tags (e.g. &lt;sup&gt; → <sup>)
  const html = text.replace(ENCODED_TAG_RE, '<$1>')
  const doc = new DOMParser().parseFromString(html, 'text/html')
  function walk(node) {
    if (node.nodeType === 3) {
      return node.textContent
        .replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;')
    }
    if (node.nodeType !== 1) return ''
    const tag = node.tagName.toLowerCase()
    const children = Array.from(node.childNodes).map(walk).join('')
    return INLINE_TAGS.has(tag) ? `<${tag}>${children}</${tag}>` : children
  }
  return Array.from(doc.body.childNodes).map(walk).join('')
}

export function renderContent(text) {
  if (!text) return ''
  // Collapse JATS XML whitespace and strip MathML fallback single-char lines
  const cleaned = text
    .split('\n')
    .filter(line => { const t = line.trim(); return t.length === 0 || t.length > 2 })
    .join(' ')
    .replace(/\s{2,}/g, ' ')
    .trim()
    .replace(/^(?:A\s+)?bstract\s+/i, '')
  const s = cleaned.replace(/&(\d+);/g, '&#$1;')
  const parts = []
  let i = 0
  while (i < s.length) {
    if (s[i] === '$' && s[i + 1] === '$') {
      const end = s.indexOf('$$', i + 2)
      if (end !== -1) {
        parts.push(katex.renderToString(s.slice(i + 2, end), { displayMode: false, throwOnError: false, macros: KATEX_MACROS }))
        i = end + 2
        continue
      }
    }
    if (s[i] === '$') {
      const end = s.indexOf('$', i + 1)
      if (end !== -1) {
        parts.push(katex.renderToString(s.slice(i + 1, end), { displayMode: false, throwOnError: false, macros: KATEX_MACROS }))
        i = end + 1
        continue
      }
    }
    const next = s.indexOf('$', i)
    parts.push(sanitizePlain(next === -1 ? s.slice(i) : s.slice(i, next)))
    i = next === -1 ? s.length : next
  }
  return parts.join('')
}