use reqwest::blocking::Client as HttpClient;
use reqwest::Url;
use serde::{Deserialize, Serialize};
use std::cmp::Ordering;
use std::time::Duration;
use rand::Rng;
const DEFAULT_BASE_URL: &str = "https://api.checklistbank.org";
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("http error: {0}")]
Http(#[from] reqwest::Error),
#[error("url parse error: {0}")]
Url(#[from] url::ParseError),
#[error("io error: {0}")]
Io(#[from] std::io::Error),
#[error("serialization error: {0}")]
Serde(#[from] serde_json::Error),
#[error("no matching Catalogue of Life release found")]
NoRelease,
}
#[derive(Clone, Debug)]
pub struct ColClient {
http: HttpClient,
base_url: Url,
pub dataset_key: i32,
}
impl ColClient {
pub fn with_dataset_key(dataset_key: i32) -> Result<Self, Error> {
let http = HttpClient::builder()
.user_agent("colonizer/0.1 (+https://github.com/oolonek/colonizer)")
.timeout(Duration::from_secs(30))
.build()?;
let base_url = Url::parse(DEFAULT_BASE_URL)?;
Ok(Self {
http,
base_url,
dataset_key,
})
}
pub fn from_latest() -> Result<Self, Error> {
let http = HttpClient::builder()
.user_agent("colonizer/0.1 (+https://github.com/oolonek/colonizer)")
.timeout(Duration::from_secs(30))
.build()?;
let base_url = Url::parse(DEFAULT_BASE_URL)?;
let latest = discover_latest_release(&http, &base_url)?;
Ok(Self {
http,
base_url,
dataset_key: latest.key,
})
}
fn url(&self, path: &str) -> Result<Url, Error> {
Ok(self.base_url.join(path)?)
}
pub fn id_for_name(&self, name: &str, rank: Option<Rank>) -> Result<Option<String>, Error> {
let mut url = self.url(&format!("/dataset/{}/nameusage/search", self.dataset_key))?;
{
let mut qp = url.query_pairs_mut();
qp.append_pair("q", name);
qp.append_pair("type", "EXACT");
qp.append_pair("limit", "50");
if let Some(r) = rank {
qp.append_pair("minRank", &r.as_api_param());
qp.append_pair("maxRank", &r.as_api_param());
}
}
let resp: Page<NameUsageHit> = self.http.get(url).send()?.error_for_status()?.json()?;
if resp.result.is_empty() {
return Ok(None);
}
let mut best: Option<&NameUsageHit> = None;
for hit in &resp.result {
if let Some(usage) = &hit.usage {
if usage.status.as_deref() == Some("accepted") {
best = Some(hit);
break;
}
}
}
let chosen = best.unwrap_or(&resp.result[0]);
Ok(Some(chosen.id.clone()))
}
pub fn list_by_rank(
&self,
rank: Rank,
max: Option<usize>,
) -> Result<Vec<SimpleUsage>, Error> {
let mut collected: Vec<SimpleUsage> = Vec::new();
let mut offset: usize = 0;
let limit: usize = 1000; loop {
let mut url = self.url(&format!("/dataset/{}/nameusage/search", self.dataset_key))?;
{
let mut qp = url.query_pairs_mut();
let rp = rank.as_api_param();
qp.append_pair("minRank", &rp);
qp.append_pair("maxRank", &rp);
qp.append_pair("limit", &limit.to_string());
qp.append_pair("offset", &offset.to_string());
}
let resp: Page<NameUsageHit> = self.http.get(url).send()?.error_for_status()?.json()?;
if resp.result.is_empty() {
break;
}
for hit in resp.result.into_iter() {
let label = hit
.usage
.as_ref()
.and_then(|u| u.label.clone())
.or_else(|| hit.label.clone())
.unwrap_or_else(|| hit.id.clone());
let rank = hit
.usage
.as_ref()
.and_then(|u| u.name.as_ref().and_then(|n| n.rank.clone()))
.or_else(|| hit.rank.clone());
collected.push(SimpleUsage {
id: hit.id,
label,
rank,
});
if let Some(max_total) = max {
if collected.len() >= max_total {
return Ok(collected);
}
}
}
offset += limit;
if let Some(max_total) = max {
if collected.len() >= max_total {
break;
}
}
if let Some(total) = resp.total {
if offset >= total as usize {
break;
}
}
}
Ok(collected)
}
}
fn discover_latest_release(http: &HttpClient, base_url: &Url) -> Result<Dataset, Error> {
let mut url = base_url.join("/dataset")?;
{
let mut qp = url.query_pairs_mut();
qp.append_pair("origin", "RELEASE");
qp.append_pair("type", "TAXONOMIC");
qp.append_pair("limit", "100");
qp.append_pair("q", "Catalogue of Life");
}
let page: Page<Dataset> = http.get(url).send()?.error_for_status()?.json()?;
let mut candidates: Vec<Dataset> = page
.result
.into_iter()
.filter(|d| d.title == "Catalogue of Life")
.collect();
if candidates.is_empty() {
return Err(Error::NoRelease);
}
candidates.sort_by(|a, b| match (a.created.as_ref(), b.created.as_ref()) {
(Some(ta), Some(tb)) => ta.cmp(tb),
_ => Ordering::Equal,
});
Ok(candidates.pop().unwrap())
}
impl ColClient {
pub fn dataset_meta(&self) -> Result<Dataset, Error> {
let url = self.url(&format!("/dataset/{}", self.dataset_key))?;
let ds: Dataset = self.http.get(url).send()?.error_for_status()?.json()?;
Ok(ds)
}
}
fn empty_vec<T>() -> Vec<T> { Vec::new() }
#[derive(Debug, Deserialize, Serialize)]
pub struct Page<T> {
#[serde(default = "empty_vec")]
pub result: Vec<T>,
#[serde(default)]
pub total: Option<i64>,
#[serde(default)]
pub limit: Option<i32>,
#[serde(default)]
pub offset: Option<i32>,
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub enum Rank {
Superdomain,
Domain,
Subdomain,
Empire,
Realm,
Superkingdom,
Kingdom,
Subkingdom,
Infrakingdom,
Superphylum,
Phylum,
Division, Subphylum,
Infraphylum,
Parvphylum,
Superclass,
Class,
Subclass,
Infraclass,
Superorder,
Order,
Suborder,
Infraorder,
Parvorder,
Superfamily,
Family,
Subfamily,
Infrafamily,
Tribe,
Subtribe,
Supergenus,
Genus,
Subgenus,
Infragenus,
SpeciesAggregate,
Species,
Subspecies,
InfraspecificName,
Variety,
Subvariety,
Form,
Subform,
Pathovar,
Biovar,
Chemovar,
Strain,
Cultivar,
CultivarGroup,
Grex,
InfragenericName,
SupragenericName,
Other,
Unranked,
Unknown(String),
}
impl Rank {
fn normalize_token(s: &str) -> String {
s.trim()
.replace(' ', "_")
.replace('-', "_")
.to_ascii_uppercase()
}
pub fn as_api_param(&self) -> String {
use Rank::*;
match self {
Superdomain => "SUPERDOMAIN",
Domain => "DOMAIN",
Subdomain => "SUBDOMAIN",
Empire => "EMPIRE",
Realm => "REALM",
Superkingdom => "SUPERKINGDOM",
Kingdom => "KINGDOM",
Subkingdom => "SUBKINGDOM",
Infrakingdom => "INFRAKINGDOM",
Superphylum => "SUPERPHYLUM",
Phylum => "PHYLUM",
Division => "DIVISION",
Subphylum => "SUBPHYLUM",
Infraphylum => "INFRAPHYLUM",
Parvphylum => "PARVPHYLUM",
Superclass => "SUPERCLASS",
Class => "CLASS",
Subclass => "SUBCLASS",
Infraclass => "INFRACLASS",
Superorder => "SUPERORDER",
Order => "ORDER",
Suborder => "SUBORDER",
Infraorder => "INFRAORDER",
Parvorder => "PARVORDER",
Superfamily => "SUPERFAMILY",
Family => "FAMILY",
Subfamily => "SUBFAMILY",
Infrafamily => "INFRAFAMILY",
Tribe => "TRIBE",
Subtribe => "SUBTRIBE",
Supergenus => "SUPERGENUS",
Genus => "GENUS",
Subgenus => "SUBGENUS",
Infragenus => "INFRAGENUS",
SpeciesAggregate => "SPECIES_AGGREGATE",
Species => "SPECIES",
Subspecies => "SUBSPECIES",
InfraspecificName => "INFRASPECIFIC_NAME",
Variety => "VARIETY",
Subvariety => "SUBVARIETY",
Form => "FORM",
Subform => "SUBFORM",
Pathovar => "PATHOVAR",
Biovar => "BIOVAR",
Chemovar => "CHEMOVAR",
Strain => "STRAIN",
Cultivar => "CULTIVAR",
CultivarGroup => "CULTIVAR_GROUP",
Grex => "GREX",
InfragenericName => "INFRAGENERIC_NAME",
SupragenericName => "SUPRAGENERIC_NAME",
Other => "OTHER",
Unranked => "UNRANKED",
Unknown(tok) => tok,
}
.to_string()
}
fn from_token(tok: &str) -> Self {
use Rank::*;
match tok {
"SUPERDOMAIN" => Superdomain,
"DOMAIN" => Domain,
"SUBDOMAIN" => Subdomain,
"EMPIRE" => Empire,
"REALM" => Realm,
"SUPERKINGDOM" => Superkingdom,
"KINGDOM" => Kingdom,
"SUBKINGDOM" => Subkingdom,
"INFRAKINGDOM" => Infrakingdom,
"SUPERPHYLUM" => Superphylum,
"PHYLUM" => Phylum,
"DIVISION" => Division,
"SUBPHYLUM" => Subphylum,
"INFRAPHYLUM" => Infraphylum,
"PARVPHYLUM" => Parvphylum,
"SUPERCLASS" => Superclass,
"CLASS" => Class,
"SUBCLASS" => Subclass,
"INFRACLASS" => Infraclass,
"SUPERORDER" => Superorder,
"ORDER" => Order,
"SUBORDER" => Suborder,
"INFRAORDER" => Infraorder,
"PARVORDER" => Parvorder,
"SUPERFAMILY" => Superfamily,
"FAMILY" => Family,
"SUBFAMILY" => Subfamily,
"INFRAFAMILY" => Infrafamily,
"TRIBE" => Tribe,
"SUBTRIBE" => Subtribe,
"SUPERGENUS" => Supergenus,
"GENUS" => Genus,
"SUBGENUS" => Subgenus,
"INFRAGENUS" => Infragenus,
"SPECIES_AGGREGATE" => SpeciesAggregate,
"SPECIES" => Species,
"SUBSPECIES" => Subspecies,
"INFRASPECIFIC_NAME" => InfraspecificName,
"VARIETY" => Variety,
"SUBVARIETY" => Subvariety,
"FORM" => Form,
"SUBFORM" => Subform,
"PATHOVAR" => Pathovar,
"BIOVAR" => Biovar,
"CHEMOVAR" => Chemovar,
"STRAIN" => Strain,
"CULTIVAR" => Cultivar,
"CULTIVAR_GROUP" => CultivarGroup,
"GREX" => Grex,
"INFRAGENERIC_NAME" => InfragenericName,
"SUPRAGENERIC_NAME" => SupragenericName,
"OTHER" => Other,
"UNRANKED" => Unranked,
x => Unknown(x.to_string()),
}
}
fn human(&self) -> &'static str {
use Rank::*;
match self {
Superdomain => "superdomain",
Domain => "domain",
Subdomain => "subdomain",
Empire => "empire",
Realm => "realm",
Superkingdom => "superkingdom",
Kingdom => "kingdom",
Subkingdom => "subkingdom",
Infrakingdom => "infrakingdom",
Superphylum => "superphylum",
Phylum => "phylum",
Division => "division",
Subphylum => "subphylum",
Infraphylum => "infraphylum",
Parvphylum => "parvphylum",
Superclass => "superclass",
Class => "class",
Subclass => "subclass",
Infraclass => "infraclass",
Superorder => "superorder",
Order => "order",
Suborder => "suborder",
Infraorder => "infraorder",
Parvorder => "parvorder",
Superfamily => "superfamily",
Family => "family",
Subfamily => "subfamily",
Infrafamily => "infrafamily",
Tribe => "tribe",
Subtribe => "subtribe",
Supergenus => "supergenus",
Genus => "genus",
Subgenus => "subgenus",
Infragenus => "infragenus",
SpeciesAggregate => "species aggregate",
Species => "species",
Subspecies => "subspecies",
InfraspecificName => "infraspecific name",
Variety => "variety",
Subvariety => "subvariety",
Form => "form",
Subform => "subform",
Pathovar => "pathovar",
Biovar => "biovar",
Chemovar => "chemovar",
Strain => "strain",
Cultivar => "cultivar",
CultivarGroup => "cultivar group",
Grex => "grex",
InfragenericName => "infrageneric name",
SupragenericName => "suprageneric name",
Other => "other",
Unranked => "unranked",
Unknown(_) => "unknown",
}
}
}
impl std::fmt::Display for Rank {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Rank::Unknown(tok) => write!(f, "{}", tok.to_ascii_lowercase()),
other => write!(f, "{}", other.human()),
}
}
}
impl std::str::FromStr for Rank {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let tok = Rank::normalize_token(s);
Ok(Rank::from_token(&tok))
}
}
impl Serialize for Rank {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
serializer.serialize_str(&self.to_string())
}
}
impl<'de> Deserialize<'de> for Rank {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
Ok(s.parse().unwrap_or_else(|_| Rank::Unknown(Rank::normalize_token(&s))))
}
}
#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct Dataset {
pub key: i32,
pub title: String,
#[serde(default)]
pub alias: Option<String>,
#[serde(default)]
pub origin: Option<String>,
#[serde(default)]
pub created: Option<String>,
}
#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct NameUsageHit {
pub id: String,
#[serde(default)]
pub label: Option<String>,
#[serde(default)]
pub rank: Option<Rank>,
#[serde(default)]
pub classification: Option<Vec<ClassificationNode>>,
#[serde(default)]
pub usage: Option<Usage>,
}
#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct ClassificationNode {
pub id: String,
pub name: String,
#[serde(default)]
pub rank: Option<Rank>,
#[serde(default)]
pub label: Option<String>,
}
#[allow(non_snake_case)]
#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct Usage {
pub id: String,
#[serde(default)]
pub status: Option<String>,
#[serde(default)]
pub parentId: Option<String>,
#[serde(default)]
pub label: Option<String>,
#[serde(default)]
pub name: Option<Name>,
}
#[allow(non_snake_case)]
#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct Name {
#[serde(default)]
pub scientificName: Option<String>,
#[serde(default)]
pub authorship: Option<String>,
#[serde(default)]
pub rank: Option<Rank>,
}
#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct SimpleUsage {
pub id: String,
pub label: String,
#[serde(default)]
pub rank: Option<Rank>,
}
#[allow(non_snake_case)]
#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct TreeNode {
pub datasetKey: Option<i32>,
pub id: String,
#[serde(default)]
pub parentId: Option<String>,
#[serde(default)]
pub rank: Option<Rank>,
#[serde(default)]
pub status: Option<String>,
#[serde(default)]
pub count: Option<i64>,
#[serde(default)]
pub childCount: Option<i64>,
#[serde(default)]
pub name: Option<String>,
#[serde(default)]
pub authorship: Option<String>,
#[serde(default)]
pub labelHtml: Option<String>,
}
#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct SuggestItem {
pub match_: String,
#[serde(default)]
pub context: Option<String>,
#[serde(rename = "usageId")]
pub usage_id: String,
#[serde(rename = "nameId")]
pub name_id: String,
#[serde(default)]
pub rank: Option<Rank>,
#[serde(default)]
pub status: Option<String>,
#[serde(rename = "acceptedUsageId")]
#[serde(default)]
pub accepted_usage_id: Option<String>,
#[serde(rename = "acceptedName")]
#[serde(default)]
pub accepted_name: Option<String>,
#[serde(default)]
pub suggestion: Option<String>,
}
#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct VernacularName {
pub id: i64,
pub name: String,
#[serde(default)]
pub language: Option<String>,
}
#[allow(non_snake_case)]
#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct DatasetVernacular {
pub id: i64,
pub name: String,
#[serde(default)]
pub language: Option<String>,
#[serde(rename = "taxonID")]
pub taxon_id: String,
}
impl ColClient {
pub fn roots(&self, max: Option<usize>) -> Result<Vec<TreeNode>, Error> {
let mut items = Vec::new();
let mut offset = 0usize;
let limit = 1000usize;
loop {
let mut url = self.url(&format!("/dataset/{}/tree", self.dataset_key))?;
{
let mut qp = url.query_pairs_mut();
qp.append_pair("limit", &limit.to_string());
qp.append_pair("offset", &offset.to_string());
}
let page: Page<TreeNode> = self.http.get(url).send()?.error_for_status()?.json()?;
if page.result.is_empty() { break; }
items.extend(page.result.into_iter());
if let Some(m) = max { if items.len() >= m { break; } }
offset += limit;
if let Some(total) = page.total { if offset >= total as usize { break; } }
}
if let Some(m) = max { items.truncate(m); }
Ok(items)
}
pub fn children_of(&self, taxon_id: &str, max: Option<usize>) -> Result<Vec<TreeNode>, Error> {
let mut items = Vec::new();
let mut offset = 0usize;
let limit = 1000usize;
loop {
let mut url = self.url(&format!("/dataset/{}/tree/{}/children", self.dataset_key, taxon_id))?;
{
let mut qp = url.query_pairs_mut();
qp.append_pair("limit", &limit.to_string());
qp.append_pair("offset", &offset.to_string());
}
let page: Page<TreeNode> = self.http.get(url).send()?.error_for_status()?.json()?;
if page.result.is_empty() { break; }
items.extend(page.result.into_iter());
if let Some(m) = max { if items.len() >= m { break; } }
offset += limit;
if let Some(total) = page.total { if offset >= total as usize { break; } }
}
if let Some(m) = max { items.truncate(m); }
Ok(items)
}
pub fn classification_of(&self, taxon_id: &str) -> Result<Vec<ClassificationNode>, Error> {
let url = self.url(&format!("/dataset/{}/taxon/{}/classification", self.dataset_key, taxon_id))?;
let list: Vec<ClassificationNode> = self.http.get(url).send()?.error_for_status()?.json()?;
Ok(list)
}
pub fn vernacular_of(&self, taxon_id: &str) -> Result<Vec<VernacularName>, Error> {
let url = self.url(&format!("/dataset/{}/taxon/{}/vernacular", self.dataset_key, taxon_id))?;
let list: Vec<VernacularName> = self.http.get(url).send()?.error_for_status()?.json()?;
Ok(list)
}
pub fn suggest(&self, q: &str, limit: usize) -> Result<Vec<SuggestItem>, Error> {
let mut url = self.url(&format!("/dataset/{}/nameusage/suggest", self.dataset_key))?;
{
let mut qp = url.query_pairs_mut();
qp.append_pair("q", q);
qp.append_pair("limit", &limit.to_string());
}
let raw = self.http.get(url).send()?.error_for_status()?.text()?;
let mut vec: Vec<serde_json::Value> = serde_json::from_str(&raw)?;
for v in &mut vec {
if let Some(obj) = v.as_object_mut() {
if let Some(mv) = obj.remove("match") {
obj.insert("match_".to_string(), mv);
}
}
}
let items: Vec<SuggestItem> = serde_json::from_value(serde_json::Value::Array(vec))?;
Ok(items)
}
pub fn random_vernacular_by_language(&self, language: &str) -> Result<Option<DatasetVernacular>, Error> {
self.random_vernacular_by_language_filtered(language, false, 12)
}
pub fn taxon(&self, taxon_id: &str) -> Result<Usage, Error> {
let url = self.url(&format!("/dataset/{}/taxon/{}", self.dataset_key, taxon_id))?;
let usage: Usage = self.http.get(url).send()?.error_for_status()?.json()?;
Ok(usage)
}
pub fn taxon_opt(&self, taxon_id: &str) -> Result<Option<Usage>, Error> {
let url = self.url(&format!("/dataset/{}/taxon/{}", self.dataset_key, taxon_id))?;
let resp = self.http.get(url).send()?;
if resp.status().as_u16() == 404 { return Ok(None); }
let usage: Usage = resp.error_for_status()?.json()?;
Ok(Some(usage))
}
pub fn random_vernacular_by_language_filtered(
&self,
language: &str,
one_word_only: bool,
max_tries: usize,
) -> Result<Option<DatasetVernacular>, Error> {
let mut url = self.url(&format!("/dataset/{}/vernacular", self.dataset_key))?;
{
let mut qp = url.query_pairs_mut();
qp.append_pair("language", language);
qp.append_pair("limit", "1");
}
let page: Page<DatasetVernacular> = self.http.get(url).send()?.error_for_status()?.json()?;
let total = page.total.unwrap_or(0);
if total <= 0 { return Ok(None); }
let mut rng = rand::thread_rng();
let mut attempts = 0usize;
while attempts < max_tries {
attempts += 1;
let offset: i64 = rng.gen_range(0..total);
let mut url = self.url(&format!("/dataset/{}/vernacular", self.dataset_key))?;
{
let mut qp = url.query_pairs_mut();
qp.append_pair("language", language);
qp.append_pair("limit", "1");
qp.append_pair("offset", &offset.to_string());
}
let page: Page<DatasetVernacular> = self.http.get(url).send()?.error_for_status()?.json()?;
if let Some(v) = page.result.into_iter().next() {
if one_word_only && !is_one_word(&v.name) { continue; }
if looks_like_usage_id(&v.taxon_id) {
if self.taxon_opt(&v.taxon_id)?.is_some() {
return Ok(Some(v));
}
}
}
}
Ok(None)
}
pub fn wikipedia_summary(&self, scientific_name: &str, prefer_lang: &str) -> Result<Option<WikipediaInfo>, Error> {
use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};
fn summary_url(lang: &str, title: &str) -> String {
let enc = utf8_percent_encode(title, NON_ALPHANUMERIC).to_string();
format!("https://{}.wikipedia.org/api/rest_v1/page/summary/{}?redirect=true", lang, enc)
}
let title = scientific_name.replace(' ', "_");
for lang in [prefer_lang, "en"] {
let url = summary_url(lang, &title);
let resp = self.http.get(url).header("accept", "application/json").send()?;
if resp.status().as_u16() == 404 { continue; }
if !resp.status().is_success() { continue; }
let v: serde_json::Value = resp.json()?;
let extract = v.get("extract").and_then(|x| x.as_str()).map(|s| s.trim().to_string()).unwrap_or_default();
if extract.is_empty() { continue; }
let wiki_url = v
.get("content_urls")
.and_then(|cu| cu.get("desktop"))
.and_then(|d| d.get("page"))
.and_then(|p| p.as_str())
.unwrap_or("")
.to_string();
let wikidata_qid = v.get("wikibase_item").and_then(|x| x.as_str()).map(|s| s.to_string());
let wikidata_url = wikidata_qid
.as_ref()
.map(|qid| format!("https://www.wikidata.org/wiki/{}", qid));
return Ok(Some(WikipediaInfo {
lang: lang.to_string(),
title: v.get("title").and_then(|t| t.as_str()).unwrap_or(&title).to_string(),
extract,
wikipedia_url: wiki_url,
wikidata_qid,
wikidata_url,
}));
}
Ok(None)
}
}
fn is_one_word(s: &str) -> bool {
let mut it = s.split_whitespace();
match (it.next(), it.next()) { (Some(_), None) => true, _ => false }
}
fn looks_like_usage_id(s: &str) -> bool {
let len = s.len();
if !(4..=6).contains(&len) { return false; }
s.chars().all(|c| c.is_ascii_uppercase() || c.is_ascii_digit())
}
#[derive(Debug, Clone)]
pub struct WikipediaInfo {
pub lang: String,
pub title: String,
pub extract: String,
pub wikipedia_url: String,
pub wikidata_qid: Option<String>,
pub wikidata_url: Option<String>,
}