language-tag 0.9.0

Handle language tags as defined in BCP47 (e.g. 'en-US', 'fr-FR', 'zh-cmn-Hans-CN')
Documentation
use tags::*;

pub languagetag -> LanguageTag
    = l:langtag { LanguageTag::LangTag(l) } 
    / p:privateuse { LanguageTag::PrivateUse(p) }
    / g:grandfathered { LanguageTag::GrandFathered(g) }


pub langtag -> LangTag
    = lang:language
    sc:sepscript?
    reg:sepregion?
    variants:sepvariants?
    extensions:sepextensions?
    separator? prvt:privateuse?

    {?
        let v = if let Some(vs) = variants {
            vs
        }
        else {
            Vec::new()
        };

        let e = if let Some(es) = extensions {
            es
        }
        else {
            Vec::new()
        };

        LangTag::from_parts(lang, sc, reg, v, e, prvt).map_err(|_| "Extensions must be uniquely defined by their singleton." )
    }


pub language -> TagLanguage
    = main:mainlang langexts:sepextlang? (&separator / !.)
    { 
        let ext: Vec<String> = if let Some(le) = langexts {
            le
        }
        else {
            Vec::new()
        };
        TagLanguage::new(main, ext) 
    }

pub mainlang -> String
    = s:$(ascii_alphabetic*<2,3>) { s.to_owned() }

pub extlang_unique -> String
    = s:$(ascii_alphabetic*<3>) (&separator / !.) { s.to_owned() }

sepextlang -> Vec<String>
    = separator v:extlang { v }

pub extlang -> Vec<String>
    = #quiet<vec:(extlang_unique **<1,3> separator)  { vec }>
    / #expected("Expecting language extension e.g. 'cmn'.")


sepscript -> TagScript
    = separator s:script (&separator / !.) { s }

pub script -> TagScript
    = s:$(ascii_alphabetic*<4>) { TagScript::new(s.to_owned()) }

sepregion -> TagRegion
    = separator r:region (&separator / !.) { r }

pub region -> TagRegion
    = region_alpha / region_digit

region_alpha -> TagRegion
    = #quiet<s:$(ascii_alphabetic*<2>) { TagRegion::new(s.to_owned()) }>
    / #expected("Expectiong region e.g. 'FR'.")

region_digit -> TagRegion
    = #quiet<s:$(ascii_digit*<3>) { TagRegion::new(s.to_owned()) }>
    / #expected("Expecting region e.g. '123'.")

sepvariants -> Vec<TagVariant>
    = separator v:(variant ++ separator) (&separator / !.) { v }

pub variant -> TagVariant
    = v1:variant_alpha { v1 } 
    / v2:variant_digit { v2 }

variant_alpha -> TagVariant
    = s:$(ascii_alphabetic*<5,8>) { TagVariant::new(s.to_owned()) }

variant_digit -> TagVariant
    = s:$(ascii_digit ascii_alphanum*<3>) { TagVariant::new(s.to_owned()) }

extcontent -> String
    = s:$(ascii_alphanum*<2,8>) { s.to_owned() }

sepextensions -> Vec<TagExtension>
    = separator e:(extension ++ separator) (&separator / !.) { e }

pub extension -> TagExtension
    = s:$singleton separator tags:(extcontent ++ separator)
    {? 
        if let Some(c) = s.chars().next() {
            Ok(TagExtension::new(c, tags )) 
        }
        else {
            Err("The extension singleton is not well formed")
        }
    } 

privcontent -> String
    = s:$(ascii_alphanum*<1,8>) { s.to_owned() }

sepprivateuse -> TagExtension
    = separator p:privateuse (&separator / !.) { p }

pub privateuse -> TagExtension
    = 'x' separator tags:(privcontent ++ separator) { TagExtension::new('x', tags) }

pub grandfathered -> String
    = s:$(irregular / regular) { s.to_owned() }

regular 
    = "art-lojban"        
        / "cel-gaulish"      
        / "no-bok"            
        / "no-nyn"            
        / "zh-guoyu"          
        / "zh-hakka"          
        / "zh-min"            
        / "zh-min-nan"        
        / "zh-xiang"

irregular
    = "en-GB-oed"        
        / "i-ami"             
        / "i-bnn"             
        / "i-default"         
        / "i-enochian"        
        / "i-hak"             
        / "i-klingon"         
        / "i-lux"            
        / "i-mingo"    
        / "i-navajo"
        / "i-pwn"
        / "i-tao"
        / "i-tay"
        / "i-tsu"
        / "sgn-BE-FR"
        / "sgn-BE-NL"
        / "sgn-CH-DE"

//x reserved
singleton
    = [0-9A-WY-Za-wy-z]

separator
    = [-_]

ascii_digit
    = [0-9]

ascii_alphabetic
    = [a-zA-Z]

ascii_alphanum
    = [0-9a-zA-Z]