mod errors;
mod layout_table;
#[cfg(feature = "likelysubtags")]
pub mod likelysubtags;
#[doc(hidden)]
pub mod parser;
#[cfg(feature = "serde")]
mod serde;
pub mod subtags;
pub use crate::errors::LanguageIdentifierError;
use std::fmt::Write;
use std::iter::Peekable;
use std::str::FromStr;
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum CharacterDirection {
RTL,
LTR,
TTB,
}
type PartsTuple = (
subtags::Language,
Option<subtags::Script>,
Option<subtags::Region>,
Vec<subtags::Variant>,
);
#[derive(Default, Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)]
pub struct LanguageIdentifier {
pub language: subtags::Language,
pub script: Option<subtags::Script>,
pub region: Option<subtags::Region>,
variants: Option<Box<[subtags::Variant]>>,
}
impl LanguageIdentifier {
pub fn from_bytes(v: &[u8]) -> Result<Self, LanguageIdentifierError> {
Ok(parser::parse_language_identifier(v)?)
}
pub fn from_parts(
language: subtags::Language,
script: Option<subtags::Script>,
region: Option<subtags::Region>,
variants: &[subtags::Variant],
) -> Self {
let variants = if !variants.is_empty() {
let mut v = variants.to_vec();
v.sort_unstable();
v.dedup();
Some(v.into_boxed_slice())
} else {
None
};
Self {
language,
script,
region,
variants,
}
}
pub const fn from_raw_parts_unchecked(
language: subtags::Language,
script: Option<subtags::Script>,
region: Option<subtags::Region>,
variants: Option<Box<[subtags::Variant]>>,
) -> Self {
Self {
language,
script,
region,
variants,
}
}
#[doc(hidden)]
pub fn try_from_iter<'a>(
iter: &mut Peekable<impl Iterator<Item = &'a [u8]>>,
allow_extension: bool,
) -> Result<LanguageIdentifier, LanguageIdentifierError> {
Ok(parser::parse_language_identifier_from_iter(
iter,
allow_extension,
)?)
}
pub fn into_parts(self) -> PartsTuple {
(
self.language,
self.script,
self.region,
self.variants.map_or_else(Vec::new, |v| v.to_vec()),
)
}
pub fn matches<O: AsRef<Self>>(
&self,
other: &O,
self_as_range: bool,
other_as_range: bool,
) -> bool {
let other = other.as_ref();
self.language
.matches(other.language, self_as_range, other_as_range)
&& subtag_matches(&self.script, &other.script, self_as_range, other_as_range)
&& subtag_matches(&self.region, &other.region, self_as_range, other_as_range)
&& subtags_match(
&self.variants,
&other.variants,
self_as_range,
other_as_range,
)
}
pub fn variants(&self) -> impl ExactSizeIterator<Item = &subtags::Variant> {
let variants: &[_] = match self.variants {
Some(ref v) => v,
None => &[],
};
variants.iter()
}
pub fn set_variants(&mut self, variants: &[subtags::Variant]) {
let mut v = variants.to_vec();
if v.is_empty() {
self.variants = None;
} else {
v.sort_unstable();
v.dedup();
self.variants = Some(v.into_boxed_slice());
}
}
pub fn has_variant(&self, variant: subtags::Variant) -> bool {
if let Some(variants) = &self.variants {
variants.contains(&variant)
} else {
false
}
}
pub fn clear_variants(&mut self) {
self.variants = None;
}
#[cfg(feature = "likelysubtags")]
pub fn maximize(&mut self) -> bool {
if let Some(new_li) = likelysubtags::maximize(self.language, self.script, self.region) {
self.language = new_li.0;
self.script = new_li.1;
self.region = new_li.2;
true
} else {
false
}
}
#[cfg(feature = "likelysubtags")]
pub fn minimize(&mut self) -> bool {
if let Some(new_li) = likelysubtags::minimize(self.language, self.script, self.region) {
self.language = new_li.0;
self.script = new_li.1;
self.region = new_li.2;
true
} else {
false
}
}
pub fn character_direction(&self) -> CharacterDirection {
match (self.language.into(), self.script) {
(_, Some(script))
if layout_table::SCRIPTS_CHARACTER_DIRECTION_LTR.contains(&script.into()) =>
{
CharacterDirection::LTR
}
(_, Some(script))
if layout_table::SCRIPTS_CHARACTER_DIRECTION_RTL.contains(&script.into()) =>
{
CharacterDirection::RTL
}
(_, Some(script))
if layout_table::SCRIPTS_CHARACTER_DIRECTION_TTB.contains(&script.into()) =>
{
CharacterDirection::TTB
}
(Some(lang), _) if layout_table::LANGS_CHARACTER_DIRECTION_RTL.contains(&lang) => {
#[cfg(feature = "likelysubtags")]
if let Some((_, Some(script), _)) =
likelysubtags::maximize(self.language, None, self.region)
{
if layout_table::SCRIPTS_CHARACTER_DIRECTION_LTR.contains(&script.into()) {
return CharacterDirection::LTR;
}
}
CharacterDirection::RTL
}
_ => CharacterDirection::LTR,
}
}
}
impl FromStr for LanguageIdentifier {
type Err = LanguageIdentifierError;
fn from_str(source: &str) -> Result<Self, Self::Err> {
Self::from_bytes(source.as_bytes())
}
}
impl AsRef<LanguageIdentifier> for LanguageIdentifier {
#[inline(always)]
fn as_ref(&self) -> &LanguageIdentifier {
self
}
}
impl std::fmt::Display for LanguageIdentifier {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
self.language.fmt(f)?;
if let Some(ref script) = self.script {
f.write_char('-')?;
script.fmt(f)?;
}
if let Some(ref region) = self.region {
f.write_char('-')?;
region.fmt(f)?;
}
if let Some(variants) = &self.variants {
for variant in variants.iter() {
f.write_char('-')?;
variant.fmt(f)?;
}
}
Ok(())
}
}
impl PartialEq<&str> for LanguageIdentifier {
fn eq(&self, other: &&str) -> bool {
self.to_string().as_str() == *other
}
}
fn subtag_matches<P: PartialEq>(
subtag1: &Option<P>,
subtag2: &Option<P>,
as_range1: bool,
as_range2: bool,
) -> bool {
(as_range1 && subtag1.is_none()) || (as_range2 && subtag2.is_none()) || subtag1 == subtag2
}
fn is_option_empty<P: PartialEq>(subtag: &Option<Box<[P]>>) -> bool {
subtag.as_ref().is_none_or(|t| t.is_empty())
}
fn subtags_match<P: PartialEq>(
subtag1: &Option<Box<[P]>>,
subtag2: &Option<Box<[P]>>,
as_range1: bool,
as_range2: bool,
) -> bool {
(as_range1 && is_option_empty(subtag1))
|| (as_range2 && is_option_empty(subtag2))
|| subtag1 == subtag2
}
pub fn canonicalize<S: AsRef<[u8]>>(input: S) -> Result<String, LanguageIdentifierError> {
let lang_id = LanguageIdentifier::from_bytes(input.as_ref())?;
Ok(lang_id.to_string())
}
#[test]
fn invalid_subtag() {
assert!(LanguageIdentifier::from_bytes("en-ÁÁÁÁ".as_bytes()).is_err());
}