#![allow(clippy::upper_case_acronyms)]
use crate::error::{Error, ErrorKind};
use pest::iterators::Pair;
use pest::Parser;
use std::collections::HashSet;
use std::fmt::{Display, Formatter};
use std::hash::{Hash, Hasher};
use std::str::FromStr;
#[derive(Clone, Debug, Eq)]
pub enum LanguageTag {
Tag(Tag),
PrivateUse(Vec<String>),
Grandfathered(String),
}
#[derive(Clone, Debug, Eq)]
pub struct Tag {
language: String,
script: Option<String>,
region: Option<String>,
variants: Vec<String>,
extensions: Vec<Extension>,
private_use: Vec<String>,
}
#[derive(Clone, Debug, Eq)]
pub struct Extension {
singleton: char,
sub_tags: Vec<String>,
}
#[derive(Parser)]
#[grammar = "model/literal/lang.pest"]
struct LanguageTagParser;
const LANG_SEP: &str = "-";
impl Display for LanguageTag {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
LanguageTag::Tag(t) => {
write!(f, "{}", t)
}
LanguageTag::PrivateUse(s) => {
write!(f, "x-{}", s.join(LANG_SEP))
}
LanguageTag::Grandfathered(s) => {
write!(f, "{}", s)
}
}
}
}
impl FromStr for LanguageTag {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut parsed = LanguageTagParser::parse(Rule::language_tag, s).map_err(|e| {
Error::with_chain(
e,
ErrorKind::InvalidFromStr(
s.to_string(),
std::any::type_name::<LanguageTag>().to_string(),
),
)
})?;
let top_node = parsed.next().unwrap();
language_tag(top_node)
}
}
impl PartialEq for LanguageTag {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(Self::Tag(lhs), Self::Tag(rhs)) => lhs.eq(rhs),
(Self::PrivateUse(lhs), Self::PrivateUse(rhs)) => vec_eq_ignore_case(lhs, rhs),
(Self::Grandfathered(lhs), Self::Grandfathered(rhs)) => lhs.eq_ignore_ascii_case(rhs),
_ => false,
}
}
}
impl Hash for LanguageTag {
fn hash<H: Hasher>(&self, state: &mut H) {
match self {
LanguageTag::Tag(tag) => tag.hash(state),
LanguageTag::PrivateUse(ss) => ss
.iter()
.map(|s| s.to_ascii_lowercase())
.collect::<Vec<String>>()
.hash(state),
LanguageTag::Grandfathered(s) => s.hash(state),
}
}
}
impl LanguageTag {
pub fn is_tag(&self) -> bool {
matches!(self, Self::Tag(_))
}
pub fn is_grandfathered(&self) -> bool {
matches!(self, Self::Grandfathered(_))
}
pub fn is_private_use(&self) -> bool {
matches!(self, Self::PrivateUse(_))
}
}
impl Display for Tag {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", &self.language)?;
if let Some(region) = &self.region {
write!(f, "{}{}", LANG_SEP, region)?;
}
for variant in &self.variants {
write!(f, "{}{}", LANG_SEP, variant)?;
}
for extension in &self.extensions {
write!(f, "{}{}", LANG_SEP, extension)?;
}
for private_use in &self.private_use {
write!(f, "{}{}", LANG_SEP, private_use)?;
}
Ok(())
}
}
impl PartialEq for Tag {
fn eq(&self, other: &Self) -> bool {
self.language.eq_ignore_ascii_case(&other.language)
&& option_eq_ignore_case(&self.script, &other.script)
&& option_eq_ignore_case(&self.region, &other.region)
&& vec_eq_ignore_case(&self.variants, &other.variants)
&& self.extensions == other.extensions
&& vec_eq_ignore_case(&self.private_use, &other.private_use)
}
}
impl Hash for Tag {
fn hash<H: Hasher>(&self, state: &mut H) {
self.language.to_ascii_lowercase().hash(state);
self.script
.as_ref()
.map(|s| s.to_ascii_lowercase())
.hash(state);
self.variants
.iter()
.map(|s| s.to_ascii_lowercase())
.collect::<Vec<String>>()
.hash(state);
self.extensions.iter().for_each(|e| e.hash(state));
self.private_use
.iter()
.map(|s| s.to_ascii_lowercase())
.collect::<Vec<String>>()
.hash(state);
}
}
impl From<Tag> for LanguageTag {
fn from(tag: Tag) -> Self {
LanguageTag::Tag(tag)
}
}
impl Tag {
pub fn language(&self) -> &String {
&self.language
}
pub fn has_script(&self) -> bool {
self.script.is_some()
}
pub fn script(&self) -> Option<&String> {
self.script.as_ref()
}
pub fn has_region(&self) -> bool {
self.region.is_some()
}
pub fn region(&self) -> Option<&String> {
self.region.as_ref()
}
pub fn has_variants(&self) -> bool {
!self.variants.is_empty()
}
pub fn variants(&self) -> impl Iterator<Item = &String> {
self.variants.iter()
}
pub fn has_extensions(&self) -> bool {
!self.extensions.is_empty()
}
pub fn extensions(&self) -> impl Iterator<Item = &Extension> {
self.extensions.iter()
}
pub fn has_private_use(&self) -> bool {
!self.private_use.is_empty()
}
pub fn private_use(&self) -> impl Iterator<Item = &String> {
self.private_use.iter()
}
pub fn to_canonical_format(&self) -> Self {
let language = self.language.to_lowercase();
let script = self.script.as_ref().map(|s| {
let mut cs = s.chars();
format!(
"{}{}",
cs.next().unwrap().to_uppercase(),
cs.map(|c| c.to_lowercase()).flatten().collect::<String>()
)
});
let region = self.region.as_ref().map(|s| s.to_uppercase());
let mut extensions = self.extensions.clone();
extensions.sort_by_key(|e| e.singleton);
Self {
language,
script,
region,
variants: self.variants.clone(),
extensions,
private_use: self.private_use.clone(),
}
}
}
impl Display for Extension {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.singleton)?;
for sub_tag in &self.sub_tags {
write!(f, "{}{}", LANG_SEP, sub_tag)?;
}
Ok(())
}
}
impl PartialEq for Extension {
fn eq(&self, other: &Self) -> bool {
self.singleton == other.singleton && vec_eq_ignore_case(&self.sub_tags, &other.sub_tags)
}
}
impl Hash for Extension {
fn hash<H: Hasher>(&self, state: &mut H) {
self.singleton.hash(state);
self.sub_tags
.iter()
.map(|s| s.to_ascii_lowercase())
.collect::<Vec<String>>()
.hash(state);
}
}
impl Extension {
pub fn singleton(&self) -> &char {
&self.singleton
}
pub fn sub_tags(&self) -> impl Iterator<Item = &String> {
self.sub_tags.iter()
}
}
fn language_tag(input_pair: Pair<'_, Rule>) -> Result<LanguageTag, Error> {
if input_pair.as_rule() == Rule::language_tag {
let inner_pair = input_pair.into_inner().next().unwrap();
match inner_pair.as_rule() {
Rule::lang_tag => Ok(LanguageTag::Tag(lang_tag(inner_pair)?)),
Rule::private_use => Ok(LanguageTag::PrivateUse(private_use(inner_pair)?)),
Rule::grandfathered => Ok(LanguageTag::Grandfathered(inner_pair.as_str().to_string())),
_ => unreachable!(),
}
} else {
Err(ErrorKind::InvalidFromStr(
input_pair.as_str().to_string(),
std::any::type_name::<LanguageTag>().to_string(),
)
.into())
}
}
fn lang_tag(input_pair: Pair<'_, Rule>) -> Result<Tag, Error> {
if input_pair.as_rule() == Rule::lang_tag {
let mut inner_pairs = input_pair.into_inner();
let language = inner_pairs.next().unwrap().as_str().to_string();
let mut script: Option<String> = None;
let mut region: Option<String> = None;
let mut variants: Vec<String> = Default::default();
let mut extensions: Vec<Extension> = Default::default();
let mut v_private_use: Vec<String> = Default::default();
for inner_pair in inner_pairs {
match inner_pair.as_rule() {
Rule::script => {
script = Some(inner_pair.as_str().to_string());
}
Rule::region => {
region = Some(inner_pair.as_str().to_string());
}
Rule::variant => {
variants.push(inner_pair.as_str().to_string());
}
Rule::extension => {
extensions.push(extension(inner_pair)?);
}
Rule::private_use => {
v_private_use = private_use(inner_pair)?;
}
Rule::SEP => {}
_ => {
unreachable!()
}
}
}
let variant_set: HashSet<String> = variants.iter().cloned().collect();
if variants.len() != variant_set.len() {
return Err(ErrorKind::InvalidFromStr(
format!("duplicate variants: {:?}", variants),
std::any::type_name::<LanguageTag>().to_string(),
)
.into());
}
Ok(Tag {
language,
script,
region,
variants,
extensions,
private_use: v_private_use,
})
} else {
Err(ErrorKind::InvalidFromStr(
input_pair.as_str().to_string(),
std::any::type_name::<LanguageTag>().to_string(),
)
.into())
}
}
#[allow(clippy::unnecessary_wraps)]
fn extension(input_pair: Pair<'_, Rule>) -> Result<Extension, Error> {
let singleton = &input_pair.as_str()[0..1].chars().next().unwrap();
let sub_tags = &input_pair.as_str()[2..];
Ok(Extension {
singleton: *singleton,
sub_tags: sub_tags.split(LANG_SEP).map(str::to_string).collect(),
})
}
#[allow(clippy::unnecessary_wraps)]
fn private_use(input_pair: Pair<'_, Rule>) -> Result<Vec<String>, Error> {
let sub_tags = &input_pair.as_str()[2..];
Ok(sub_tags.split(LANG_SEP).map(str::to_string).collect())
}
fn option_eq_ignore_case(lhs: &Option<String>, rhs: &Option<String>) -> bool {
match (lhs, rhs) {
(None, None) => true,
(Some(lhs), Some(rhs)) => lhs.eq_ignore_ascii_case(rhs),
_ => false,
}
}
fn vec_eq_ignore_case(lhs: &[String], rhs: &[String]) -> bool {
if lhs.len() == rhs.len() {
lhs.iter()
.zip(rhs.iter())
.all(|(lhs, rhs)| lhs.eq_ignore_ascii_case(rhs))
} else {
false
}
}