pub struct LanguageIdentifier { /* private fields */ }
Expand description

LanguageIdentifier is a core struct representing a Unicode Language Identifier.

Examples

use unic_langid_impl::LanguageIdentifier;

let li: LanguageIdentifier = "en-US".parse()
    .expect("Failed to parse.");

assert_eq!(li.language(), "en");
assert_eq!(li.script(), None);
assert_eq!(li.region(), Some("US"));
assert_eq!(li.variants().len(), 0);

Parsing

Unicode recognizes three levels of standard conformance for any language identifier:

  • well-formed - syntactically correct
  • valid - well-formed and only uses registered language subtags, extensions, keywords, types…
  • canonical - valid and no deprecated codes or structure.

At the moment parsing normalizes a well-formed language identifier converting _ separators to - and adjusting casing to conform to the Unicode standard.

Any bogus subtags will cause the parsing to fail with an error. No subtag validation is performed.

Examples:

use unic_langid_impl::LanguageIdentifier;

let li: LanguageIdentifier = "eN_latn_Us-Valencia".parse()
    .expect("Failed to parse.");

assert_eq!(li.language(), "en");
assert_eq!(li.script(), Some("Latn"));
assert_eq!(li.region(), Some("US"));
assert_eq!(li.variants().collect::<Vec<_>>(), &["valencia"]);

Implementations§

§

impl LanguageIdentifier

pub fn from_bytes( v: &[u8] ) -> Result<LanguageIdentifier, LanguageIdentifierError>

A constructor which takes a utf8 slice, parses it and produces a well-formed LanguageIdentifier.

Examples
use unic_langid_impl::LanguageIdentifier;

let li = LanguageIdentifier::from_bytes("en-US".as_bytes())
    .expect("Parsing failed.");

assert_eq!(li.to_string(), "en-US");

pub fn from_parts<S>( language: Option<S>, script: Option<S>, region: Option<S>, variants: &[S] ) -> Result<LanguageIdentifier, LanguageIdentifierError>
where S: AsRef<[u8]>,

A constructor which takes optional subtags as AsRef<[u8]>, parses them and produces a well-formed LanguageIdentifier.

Examples
use unic_langid_impl::LanguageIdentifier;

let li = LanguageIdentifier::from_parts(Some("fr"), None, Some("CA"), &[])
    .expect("Parsing failed.");

assert_eq!(li.to_string(), "fr-CA");

pub fn into_raw_parts( self ) -> (Option<u64>, Option<u32>, Option<u32>, Option<Box<[u64]>>)

Consumes LanguageIdentifier and produces raw internal representations of all subtags in form of u64/u32.

Primarily used for storing internal representation and restoring via from_raw_parts_unchecked.

Examples
use unic_langid_impl::LanguageIdentifier;
use tinystr::{TinyStr8, TinyStr4};

let li: LanguageIdentifier = "en-US".parse()
    .expect("Parsing failed.");

let (lang, script, region, variants) = li.into_raw_parts();

let li2 = LanguageIdentifier::from_raw_parts_unchecked(
    lang.map(|l| unsafe { TinyStr8::new_unchecked(l) }),
    script.map(|s| unsafe { TinyStr4::new_unchecked(s) }),
    region.map(|r| unsafe { TinyStr4::new_unchecked(r) }),
    variants.map(|v| v.into_iter().map(|v| unsafe { TinyStr8::new_unchecked(*v) }).collect()),
);

assert_eq!(li2.to_string(), "en-US");

pub const fn from_raw_parts_unchecked( language: Option<TinyStr8>, script: Option<TinyStr4>, region: Option<TinyStr4>, variants: Option<Box<[TinyStr8]>> ) -> LanguageIdentifier

Consumes raw representation of subtags generating new LanguageIdentifier without any checks.

Primarily used for restoring internal representation.

Examples
use unic_langid_impl::LanguageIdentifier;
use tinystr::{TinyStr8, TinyStr4};

let li: LanguageIdentifier = "en-US".parse()
    .expect("Parsing failed.");

let (lang, script, region, variants) = li.into_raw_parts();

let li2 =  LanguageIdentifier::from_raw_parts_unchecked(
    lang.map(|l| unsafe { TinyStr8::new_unchecked(l) }),
    script.map(|s| unsafe { TinyStr4::new_unchecked(s) }),
    region.map(|r| unsafe { TinyStr4::new_unchecked(r) }),
    variants.map(|v| v.into_iter().map(|v| unsafe { TinyStr8::new_unchecked(*v) }).collect()),
);

assert_eq!(li2.to_string(), "en-US");

pub fn matches<O>( &self, other: &O, self_as_range: bool, other_as_range: bool ) -> bool

Compares a LanguageIdentifier to another AsRef<LanguageIdentifier allowing for either side to use the missing fields as wildcards.

This allows for matching between en (treated as en-*-*-*) and en-US.

Examples
use unic_langid_impl::LanguageIdentifier;

let li1: LanguageIdentifier = "en".parse()
    .expect("Parsing failed.");

let li2: LanguageIdentifier = "en-US".parse()
    .expect("Parsing failed.");

assert_ne!(li1, li2); // "en" != "en-US"
assert_ne!(li1.to_string(), li2.to_string()); // "en" != "en-US"

assert_eq!(li1.matches(&li2, false, false), false); // "en" != "en-US"
assert_eq!(li1.matches(&li2, true, false), true); // "en-*-*-*" == "en-US"
assert_eq!(li1.matches(&li2, false, true), false); // "en" != "en-*-US-*"
assert_eq!(li1.matches(&li2, true, true), true); // "en-*-*-*" == "en-*-US-*"

pub fn language(&self) -> &str

Returns the language subtag of the LanguageIdentifier.

If the language is empty, "und" is returned.

Examples
use unic_langid_impl::LanguageIdentifier;

let li1: LanguageIdentifier = "de-AT".parse()
    .expect("Parsing failed.");

assert_eq!(li1.language(), "de");

let li2: LanguageIdentifier = "und-AT".parse()
    .expect("Parsing failed.");

assert_eq!(li2.language(), "und");

pub fn set_language<S>( &mut self, language: S ) -> Result<(), LanguageIdentifierError>
where S: AsRef<[u8]>,

Sets the language subtag of the LanguageIdentifier.

Examples
use unic_langid_impl::LanguageIdentifier;

let mut li: LanguageIdentifier = "de-Latn-AT".parse()
    .expect("Parsing failed.");

li.set_language("fr")
    .expect("Parsing failed.");

assert_eq!(li.to_string(), "fr-Latn-AT");

pub fn clear_language(&mut self)

Clears the language subtag of the LanguageIdentifier.

An empty language subtag is serialized to und.

Examples
use unic_langid_impl::LanguageIdentifier;

let mut li: LanguageIdentifier = "de-Latn-AT".parse()
    .expect("Parsing failed.");

li.clear_language();

assert_eq!(li.to_string(), "und-Latn-AT");

pub fn script(&self) -> Option<&str>

Returns the script subtag of the LanguageIdentifier, if set.

Examples
use unic_langid_impl::LanguageIdentifier;

let li1: LanguageIdentifier = "de-Latn-AT".parse()
    .expect("Parsing failed.");

assert_eq!(li1.script(), Some("Latn"));

let li2: LanguageIdentifier = "de-AT".parse()
    .expect("Parsing failed.");

assert_eq!(li2.script(), None);

pub fn set_script<S>( &mut self, script: S ) -> Result<(), LanguageIdentifierError>
where S: AsRef<[u8]>,

Sets the script subtag of the LanguageIdentifier.

Examples
use unic_langid_impl::LanguageIdentifier;

let mut li: LanguageIdentifier = "sr-Latn".parse()
    .expect("Parsing failed.");

li.set_script("Cyrl")
    .expect("Parsing failed.");

assert_eq!(li.to_string(), "sr-Cyrl");

pub fn clear_script(&mut self)

Clears the script subtag of the LanguageIdentifier.

Examples
use unic_langid_impl::LanguageIdentifier;

let mut li: LanguageIdentifier = "sr-Latn".parse()
    .expect("Parsing failed.");

li.clear_script();

assert_eq!(li.to_string(), "sr");

pub fn region(&self) -> Option<&str>

Returns the region subtag of the LanguageIdentifier, if set.

Examples
use unic_langid_impl::LanguageIdentifier;

let li1: LanguageIdentifier = "de-Latn-AT".parse()
    .expect("Parsing failed.");

assert_eq!(li1.region(), Some("AT"));

let li2: LanguageIdentifier = "de".parse()
    .expect("Parsing failed.");

assert_eq!(li2.region(), None);

pub fn set_region<S>( &mut self, region: S ) -> Result<(), LanguageIdentifierError>
where S: AsRef<[u8]>,

Sets the region subtag of the LanguageIdentifier.

Examples
use unic_langid_impl::LanguageIdentifier;

let mut li: LanguageIdentifier = "fr-FR".parse()
    .expect("Parsing failed.");

li.set_region("CA")
    .expect("Parsing failed.");

assert_eq!(li.to_string(), "fr-CA");

pub fn clear_region(&mut self)

Clears the region subtag of the LanguageIdentifier.

Examples
use unic_langid_impl::LanguageIdentifier;

let mut li: LanguageIdentifier = "fr-FR".parse()
    .expect("Parsing failed.");

li.clear_region();

assert_eq!(li.to_string(), "fr");

pub fn variants(&self) -> impl ExactSizeIterator

Returns a vector of variants subtags of the LanguageIdentifier.

Examples
use unic_langid_impl::LanguageIdentifier;

let li1: LanguageIdentifier = "ca-ES-valencia".parse()
    .expect("Parsing failed.");

assert_eq!(li1.variants().collect::<Vec<_>>(), &["valencia"]);

let li2: LanguageIdentifier = "de".parse()
    .expect("Parsing failed.");

assert_eq!(li2.variants().len(), 0);

pub fn set_variants<S>( &mut self, variants: impl IntoIterator<Item = S> ) -> Result<(), LanguageIdentifierError>
where S: AsRef<[u8]>,

Sets variant subtags of the LanguageIdentifier.

Examples
use unic_langid_impl::LanguageIdentifier;

let mut li: LanguageIdentifier = "ca-ES".parse()
    .expect("Parsing failed.");

li.set_variants(&["valencia"])
    .expect("Parsing failed.");

assert_eq!(li.to_string(), "ca-ES-valencia");

pub fn has_variant<S>( &self, variant: S ) -> Result<bool, LanguageIdentifierError>
where S: AsRef<[u8]>,

Tests if a variant subtag is present in the LanguageIdentifier.

Examples
use unic_langid_impl::LanguageIdentifier;

let mut li: LanguageIdentifier = "ca-ES-macos".parse()
    .expect("Parsing failed.");

assert_eq!(li.has_variant("valencia"), Ok(false));
assert_eq!(li.has_variant("macos"), Ok(true));

pub fn clear_variants(&mut self)

Clears variant subtags of the LanguageIdentifier.

Examples
use unic_langid_impl::LanguageIdentifier;

let mut li: LanguageIdentifier = "ca-ES-valencia".parse()
    .expect("Parsing failed.");

li.clear_variants();

assert_eq!(li.to_string(), "ca-ES");

pub fn character_direction(&self) -> CharacterDirection

Returns character direction of the LanguageIdentifier.

Examples
use unic_langid_impl::{LanguageIdentifier, CharacterDirection};

let li1: LanguageIdentifier = "es-AR".parse()
    .expect("Parsing failed.");
let li2: LanguageIdentifier = "fa".parse()
    .expect("Parsing failed.");

assert_eq!(li1.character_direction(), CharacterDirection::LTR);
assert_eq!(li2.character_direction(), CharacterDirection::RTL);

Trait Implementations§

§

impl AsRef<LanguageIdentifier> for LanguageIdentifier

§

fn as_ref(&self) -> &LanguageIdentifier

Converts this type into a shared reference of the (usually inferred) input type.
§

impl Clone for LanguageIdentifier

§

fn clone(&self) -> LanguageIdentifier

Returns a copy of the value. Read more
1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
§

impl Debug for LanguageIdentifier

§

fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

Formats the value using the given formatter. Read more
§

impl Default for LanguageIdentifier

§

fn default() -> LanguageIdentifier

Returns the “default value” for a type. Read more
§

impl Display for LanguageIdentifier

§

fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

Formats the value using the given formatter. Read more
§

impl FromStr for LanguageIdentifier

§

type Err = LanguageIdentifierError

The associated error which can be returned from parsing.
§

fn from_str( source: &str ) -> Result<LanguageIdentifier, <LanguageIdentifier as FromStr>::Err>

Parses a string s to return a value of this type. Read more
§

impl Hash for LanguageIdentifier

§

fn hash<__H>(&self, state: &mut __H)
where __H: Hasher,

Feeds this value into the given Hasher. Read more
1.3.0 · source§

fn hash_slice<H>(data: &[Self], state: &mut H)
where H: Hasher, Self: Sized,

Feeds a slice of this type into the given Hasher. Read more
§

impl Ord for LanguageIdentifier

§

fn cmp(&self, other: &LanguageIdentifier) -> Ordering

This method returns an Ordering between self and other. Read more
1.21.0 · source§

fn max(self, other: Self) -> Self
where Self: Sized,

Compares and returns the maximum of two values. Read more
1.21.0 · source§

fn min(self, other: Self) -> Self
where Self: Sized,

Compares and returns the minimum of two values. Read more
1.50.0 · source§

fn clamp(self, min: Self, max: Self) -> Self
where Self: Sized + PartialOrd,

Restrict a value to a certain interval. Read more
§

impl PartialEq for LanguageIdentifier

§

fn eq(&self, other: &LanguageIdentifier) -> bool

This method tests for self and other values to be equal, and is used by ==.
1.0.0 · source§

fn ne(&self, other: &Rhs) -> bool

This method tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
§

impl PartialOrd for LanguageIdentifier

§

fn partial_cmp(&self, other: &LanguageIdentifier) -> Option<Ordering>

This method returns an ordering between self and other values if one exists. Read more
1.0.0 · source§

fn lt(&self, other: &Rhs) -> bool

This method tests less than (for self and other) and is used by the < operator. Read more
1.0.0 · source§

fn le(&self, other: &Rhs) -> bool

This method tests less than or equal to (for self and other) and is used by the <= operator. Read more
1.0.0 · source§

fn gt(&self, other: &Rhs) -> bool

This method tests greater than (for self and other) and is used by the > operator. Read more
1.0.0 · source§

fn ge(&self, other: &Rhs) -> bool

This method tests greater than or equal to (for self and other) and is used by the >= operator. Read more
§

impl Eq for LanguageIdentifier

§

impl StructuralEq for LanguageIdentifier

§

impl StructuralPartialEq for LanguageIdentifier

Auto Trait Implementations§

Blanket Implementations§

source§

impl<T> Any for T
where T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> AnyEq for T
where T: Any + PartialEq,

source§

fn equals(&self, other: &(dyn Any + 'static)) -> bool

source§

fn as_any(&self) -> &(dyn Any + 'static)

source§

impl<T> Borrow<T> for T
where T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for T
where U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> ToOwned for T
where T: Clone,

§

type Owned = T

The resulting type after obtaining ownership.
source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
source§

impl<T> ToString for T
where T: Display + ?Sized,

source§

default fn to_string(&self) -> String

Converts the given value to a String. Read more
source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.