burgerlingual 2.2.0

burger utility library for web localisation
Documentation
use crate::accept_language::CanonicalizedWeightedLanguages;
use crate::translations::Translation;
#[cfg(feature = "json")]
use crate::translations::TranslationError;
#[cfg(feature = "json")]
use std::ffi::OsStr;
use core::fmt::{Display, Formatter};
use language_tags::LanguageTag;
use std::cmp::Ordering;
use std::collections::HashMap;
use std::error::Error;
#[cfg(feature = "json")]
use std::fs::read_dir;
#[cfg(feature = "json")]
use std::path::Path;
use std::sync::Arc;
#[cfg(feature = "include_dir")]
use include_dir::Dir;
use crate::translation_view::TranslationView;

#[derive(Debug)]
pub enum RegistryError {
    FailedToReadDir(std::io::Error),
    FailedToReadDirEntry(std::io::Error),
    FailedToCanonicalizeLanguageTag(language_tags::ValidationError),
    NoCanonicalTranslation,
    #[cfg(feature = "json")]
    TranslationError(TranslationError),
}

impl Display for RegistryError {
    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
        match self {
            RegistryError::FailedToReadDir(error) => write!(f, "failed to read directory: {}", error),
            RegistryError::FailedToReadDirEntry(error) => write!(f, "failed to read directory entry: {}", error),
            RegistryError::FailedToCanonicalizeLanguageTag(error) => write!(f, "failed to canonicalize language tag: {}", error),
            RegistryError::NoCanonicalTranslation => write!(f, "no canonical translation found"),
            #[cfg(feature = "json")]
            RegistryError::TranslationError(error) => write!(f, "translation error: {}", error),
        }
    }
}

impl Error for RegistryError {}

pub struct TranslationRegistry {
    translations: HashMap<LanguageTag, Arc<Translation>>,
    canonical: LanguageTag,
}

#[derive(Debug, PartialEq, Clone)]
pub struct WeightedTranslation(pub Arc<Translation>, pub usize);

impl TranslationRegistry {
    #[must_use]
    pub fn new(canonical: LanguageTag, translations: Vec<Translation>) -> Self {
        Self {
            translations: HashMap::from_iter(
                translations.into_iter()
                    .map(|translation| (translation.language().clone(), Arc::new(translation)))
            ),
            canonical,
        }
    }

    pub fn get_translation_completeness(&self, translation: &Translation) -> Result<usize, RegistryError> {
         self.translations.get(&self.canonical).map_or(Err(RegistryError::NoCanonicalTranslation), |canonical| {
             Ok(translation.completeness(canonical))
         })
    }

    pub fn get_optimal_translations(&'_ self, languages: &CanonicalizedWeightedLanguages) -> Result<Vec<WeightedTranslation>, RegistryError> {
        // FACTORS:
        // - HTTP weight
        // - Completeness
        // I.E We don't want to give a 1% translated language!
        // Therefore, we calculate the "language appropriateness index".
        // We then create a shortlist of those with the highest indexes and search for the first one in the HTTP header.
        let mut translations = languages.iter()
            .filter_map(|language| {
                self.translations.get(&language.0).map(|translation| (language.1, translation))
            })
            .map(|language| {
                self.get_translation_completeness(language.1).map(|completeness| {
                    WeightedTranslation(
                        language.1.clone(),
                        completeness * language.0 as usize
                    )
                })
            })
            .collect::<Result<Vec<WeightedTranslation>, RegistryError>>()?;
        translations.sort_by(|a, b| {
            match a.1.cmp(&b.1) {
                Ordering::Equal => {
                    if let Some(ordering) = languages.iter().find_map(|item| {
                        if item.0.eq(a.0.language()) {
                            Some(Ordering::Greater)
                        } else if item.0.eq(b.0.language()) {
                            Some(Ordering::Less)
                        } else {
                            None
                        }
                    }) {
                        ordering
                    } else {
                        assert!(false);
                        Ordering::Equal
                    }
                },
                Ordering::Less => Ordering::Less,
                Ordering::Greater => Ordering::Greater,
            }
        });
        Ok(translations)
    }

    pub fn get_translation_view(&self, languages: &CanonicalizedWeightedLanguages) -> Result<TranslationView, RegistryError> {
        let translations = self.get_optimal_translations(languages)?.into_iter()
            .map(|translation| translation.0)
            .collect();
        self.translations.get(&self.canonical).map_or(Err(RegistryError::NoCanonicalTranslation), |canonical| {
            Ok(TranslationView::new(canonical.clone(), translations))
        })
    }

    #[cfg(feature = "json")]
    pub fn include_json_directory<S: AsRef<Path>>(&mut self, directory: S) -> Result<(), RegistryError> {
        let files = read_dir(directory.as_ref()).map_err(RegistryError::FailedToReadDir)?;
        for file in files {
            let file = file.map_err(RegistryError::FailedToReadDirEntry)?;
            let path = file.path();
            if path.is_file() && path.extension() == Some(OsStr::new("json")) {
                let translation = Translation::from_json_file(path).map_err(RegistryError::TranslationError)?;
                self.translations.insert(translation.language().clone(), Arc::new(translation));
            }
        }
        Ok(())
    }

    #[cfg(feature = "include_dir")]
    pub fn include_directory(&mut self, directory: Dir) -> Result<(), RegistryError> {
        for file in directory.files() {
            let path = file.path();
            if
                path.extension() == Some(OsStr::new("json")) &&
                let Some(stem) = path.file_stem() &&
                let Some(stem) = stem.to_str() &&
                let Some(contents) = file.contents_utf8()
            {
                let translation = Translation::from_json(
                    Translation::parse_language_tag(stem).map_err(RegistryError::TranslationError)?,
                    contents
                ).map_err(RegistryError::TranslationError)?;
                self.translations.insert(translation.language().clone(), Arc::new(translation));
            }
        }
        Ok(())
    }

    #[cfg(feature = "json")]
    pub fn from_json_directory<S: AsRef<Path>>(canonical: LanguageTag, directory: S) -> Result<TranslationRegistry, RegistryError> {
        let mut registry = Self {
            translations: HashMap::with_capacity(
                read_dir(directory.as_ref()).map_err(RegistryError::FailedToReadDir)?.count()
            ),
            canonical,
        };
        Self::include_json_directory(
            &mut registry,
            directory
        )?;
        Ok(registry)
    }

    #[cfg(feature = "include_dir")]
    pub fn from_directory(canonical: LanguageTag, directory: Dir) -> Result<TranslationRegistry, RegistryError> {
        let mut registry = Self {
            translations: HashMap::with_capacity(
                directory.files().count()
            ),
            canonical,
        };
        Self::include_directory(
            &mut registry,
            directory
        )?;
        Ok(registry)
    }
}

#[cfg(test)]
pub fn get_dummy_registry() -> (TranslationRegistry, Translation, Translation, Translation, Translation) {
    let en_us = Translation::new(
        LanguageTag::parse("en-US").unwrap().canonicalize().unwrap(),
        HashMap::from_iter(vec![
            ("internal_server_error", "Internal Server Error"),
            ("not_authorized", "Unauthorized"),
            ("spelling_of_gray", "Gray")
        ].iter().map(|(key, value)| (key.to_string(), value.to_string())))
    );
    let en_gb = Translation::new(
        LanguageTag::parse("en-GB").unwrap().canonicalize().unwrap(),
        HashMap::from_iter(vec![
            ("internal_server_error", "Internal Server Error"),
            ("not_authorized", "Unauthorized"),
            ("spelling_of_gray", "Grey")
        ].iter().map(|(key, value)| (key.to_string(), value.to_string())))
    );
    let fr_fr = Translation::new(
        LanguageTag::parse("fr-FR").unwrap().canonicalize().unwrap(),
        HashMap::from_iter(vec![
            ("internal_server_error", "Erreur Interne du Serveur"),
            ("not_authorized", "Non autorisé")
        ].iter().map(|(key, value)| (key.to_string(), value.to_string())))
    );
    let ja_jp = Translation::new(
        LanguageTag::parse("ja-JP").unwrap().canonicalize().unwrap(),
        HashMap::from_iter(vec![
            ("internal_server_error", "内部サーバーエラー"),
            ("not_authorized", "未認証")
        ].iter().map(|(key, value)| (key.to_string(), value.to_string())))
    );
    (TranslationRegistry::new(en_us.language().clone(), vec![
        en_us.clone(),
        en_gb.clone(),
        fr_fr.clone(),
        ja_jp.clone()
    ]), en_us, en_gb, fr_fr, ja_jp)
}

#[test]
pub fn test_get_optimal_translations() {
    use crate::accept_language::parse_accept_language_header;

    let (registry, en_us, en_gb, _fr_fr, ja_jp) = get_dummy_registry();
    let weighted_languages = parse_accept_language_header("ja-JP, en-GB;q=0.75, en-US;q=0.75").unwrap();
    assert_eq!(
        registry.get_optimal_translations(
            &weighted_languages
        ).unwrap().into_iter()
            .map(|item| (*item.0).clone()).collect::<Vec<_>>(),
        vec![ja_jp, en_us, en_gb]
    )
}