babel47 0.2.0

Multi-Language (BCP-47) Strings
Documentation
//! This crate helps with handling translatable strings using
//! [BCP 47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags.
//!
//! The [LangString] type collects strings for different language tags,
//! and allows retrieving by either a concrete language lor a language range.
//!
//! ## RDF support through [Taganak](taganak_core)
//!
//! The `taganak` feature enables support for loading a [LangString] from
//! an RDF graph using the [Taganak SDK](taganak_core).

use language_tags::LanguageTag;
use std::collections::HashMap;

#[cfg(feature = "taganak")]
use async_stream::stream;
#[cfg(feature = "taganak")]
use futures_util::stream::{Stream, TryStreamExt};
#[cfg(feature = "taganak")]
use std::sync::Arc;
#[cfg(feature = "taganak")]
use taganak_core::prelude::{GraphError, GraphView, Literal, Term, Triple};
#[cfg(feature = "taganak")]
use taganak_orm::{GraphORMError, GraphORMField};

/// A single translatable string
#[derive(Debug, Default, Clone)]
pub struct LangString {
    langs: HashMap<LanguageTag, String>,
    default: Option<String>,
}

impl LangString {
    /// Set the string for one language
    pub fn set(&mut self, language: LanguageTag, string: String) {
        self.langs.insert(language, string);
    }

    /// Set the default (fallback) string
    pub fn set_default(&mut self, string: String) {
        let _ = self.default.insert(string);
    }

    /// Unset the string for one language
    pub fn unset(&mut self, language: &LanguageTag) {
        self.langs.remove(language);
    }

    /// Unset the default (fallback) string
    pub fn unset_default(&mut self) {
        self.default.take();
    }

    /// Retrieve the string for a language or language range
    ///
    /// If [Some] is passed, this method resolves the requested
    /// language into a string by:
    ///
    /// 1. Trying to look up the exact language tag
    /// 2. Trying to find a matching language range
    /// 3. Falling back to the [default string](Self::set_default)
    /// 4. Returning [None] on failure
    ///
    /// If [None] is passed, [the default](Self::set_default) is returned.
    ///
    /// # Examples
    ///
    /// ```
    /// use babel47::LangString;
    ///
    /// let mut ls = LangString::default();
    /// ls.set("de".parse().unwrap(), "Schildkröte".to_string());
    /// ls.set("de-CH".parse().unwrap(), "Schildchrot".to_string());
    /// ls.set("fr".parse().unwrap(), "tortoise".to_string());
    ///
    /// assert_eq!(ls.get(&"de-DE".parse().unwrap()), Some("Schildkröte"));
    /// assert_eq!(ls.get(&"de-AT".parse().unwrap()), Some("Schildkröte"));
    /// assert_eq!(ls.get(&"de-CH".parse().unwrap()), Some("Schildchrot"));
    /// assert_eq!(ls.get(&"fr-FR".parse().unwrap()), Some("tortoise"));
    /// assert_eq!(ls.get(&"es".parse().unwrap()), None);
    ///
    /// ls.set_default("Testudinata".to_string());
    /// assert_eq!(ls.get(&"es".parse().unwrap()), Some("Testudinata"));
    /// ```
    pub fn get(&self, language: Option<&LanguageTag>) -> Option<&str> {
        if language.is_none() {
            return self.default.as_deref();
        }
        let language = language.expect("we just checked");

        if let Some(string) = self.langs.get(language) {
            return Some(string);
        }

        for known_lang in self.langs.keys() {
            if !known_lang.is_language_range() {
                continue;
            }

            if known_lang.matches(language) {
                return self.langs.get(known_lang).map(|s| s.as_str());
            }
        }

        self.default.as_deref()
    }

    #[cfg(feature = "taganak")]
    /// Load from an RDF graph using [taganak_core]
    ///
    /// The [LangString] is constructed by retrieving all objects
    /// the provided `subject` and `predicate` point to and collecting
    /// all [rdf:langString](https://www.w3.org/TR/rdf12-schema/#ch_langstring)
    /// [rdf:dirLangString](https://www.w3.org/TR/rdf12-schema/#ch_dirlangstring),
    /// plus any regular [xsd:strng](https://www.w3.org/TR/xmlschema-2/#string) as
    /// the [default](Self::set_default).
    ///
    /// # Warning about graph shape
    ///
    /// This method assumes that any language appears only once among all
    /// objects, and that only one default string exists. If any language or
    /// the default string exist more than once, the last string discovered
    /// on the graph will be used, which is unpredictable.
    ///
    /// # Examples
    ///
    /// ```
    /// use babel47::LangString;
    /// use taganak_core::prelude::Graph;
    /// use taganak_framework::lazy_graph;
    ///
    /// lazy_graph!(TEST_GRAPH, r#"
    /// @prefix eg: <http://example.com/> .
    ///
    /// eg:turtle
    ///   eg:name
    ///     "Schildkröte"@de, "Schildchrot"@de-CH, "tortoise"@fr,
    ///     "Testudinata" .
    /// "#, Some("http://example.com/"));
    ///
    /// # tokio_test::block_on(async {
    /// let ls = LangString::from_graph(
    ///     (*TEST_GRAPH).view().await,
    ///     &"<http://example.com/turtle>".try_into().unwrap(),
    ///     &"<http://example.com/name>".try_into().unwrap()
    /// ).await.unwrap();
    ///
    /// assert_eq!(ls.get(&"de-DE".parse().unwrap()), Some("Schildkröte"));
    /// assert_eq!(ls.get(&"de-AT".parse().unwrap()), Some("Schildkröte"));
    /// assert_eq!(ls.get(&"de-CH".parse().unwrap()), Some("Schildchrot"));
    /// assert_eq!(ls.get(&"fr-FR".parse().unwrap()), Some("tortoise"));
    /// assert_eq!(ls.get(&"es".parse().unwrap()), Some("Testudinata"));
    /// # });
    /// ```
    pub async fn from_graph<G>(
        graph: G,
        subject: &Term,
        predicate: &Term,
    ) -> Result<Self, GraphError>
    where
        G: GraphView + Clone,
    {
        use std::sync::Arc;

        let mut lang_string = Self::default();

        static LIMIT: usize = 256;
        let mut stream = graph
            .objects(Some(subject), Some(predicate), Some(LIMIT))
            .await?;

        while let Some(object) = stream.try_next().await? {
            let object: Arc<Term> = object;

            if !object.is_literal() {
                continue;
            }
            let literal = object.to_literal().expect("we just checked");

            match literal.datatype() {
                "http://www.w3.org/2001/XMLSchema#string" => {
                    lang_string.set_default(literal.lexical().to_string())
                }
                "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"
                | "http://www.w3.org/1999/02/22-rdf-syntax-ns#dirLangString" => lang_string.set(
                    literal.language().expect("we just checked").clone(),
                    literal.lexical().to_string(),
                ),
                _ => continue,
            }
        }

        Ok(lang_string)
    }
}

#[cfg(feature = "taganak")]
impl GraphORMField for LangString {
    fn orm_triples(
        &self,
        subject: Option<&Term>,
        predicate: Option<&Term>,
    ) -> Result<impl Stream<Item = Arc<Triple>>, GraphORMError> {
        let subject = Arc::new(subject.cloned().unwrap());
        let predicate = Arc::new(predicate.cloned().unwrap());
        Ok(Box::pin(stream! {
            if let Some(string) = &self.default {
                yield Triple::new(
                    subject.clone(),
                    predicate.clone(),
                    Arc::new(Term::Literal(Literal::new(string.clone(), None, None, None).unwrap()))
                ).unwrap();
            }

            for (lang, string) in self.langs.iter() {
                yield Triple::new(
                    subject.clone(),
                    predicate.clone(),
                    Arc::new(
                        Term::Literal(
                            Literal::new(string.clone(), None, Some(lang.clone()), None).unwrap()
                        )
                    )
                ).unwrap();
            }
        }))
    }

    async fn orm_field_from_graph(
        graph: impl GraphView,
        subject: &Term,
        predicate: &Term,
        _object: Option<&Term>,
    ) -> Result<Self, GraphORMError>
    where
        Self: Sized,
    {
        Self::from_graph(graph, subject, predicate)
            .await
            .map_err(|e| GraphORMError::FailedDeserialize(e.to_string()))
    }
}