rust_bert/models/nllb/
mod.rs

1use crate::{
2    m2m_100::M2M100Generator,
3    pipelines::translation::Language::{self, *},
4};
5
6pub struct NLLBResources;
7pub struct NLLBConfigResources;
8pub struct NLLBVocabResources;
9pub struct NLLBMergeResources;
10pub struct NLLBLanguages;
11pub struct NLLBSpecialMap;
12
13impl NLLBResources {
14    pub const NLLB_600M_DISTILLED: (&'static str, &'static str) = (
15        "nllb200-distilled-600m/model",
16        "https://huggingface.co/datasets/vpermilp/nllb-200-distilled-600M-rust/resolve/main/rust_model.ot",
17    );
18
19    pub const NLLB_1_3B: (&'static str, &'static str) = (
20        "nllb200-1_3b/model",
21        "https://huggingface.co/datasets/vpermilp/nllb-200-1.3B-rust/resolve/main/rust_model.ot",
22    );
23}
24
25impl NLLBConfigResources {
26    pub const NLLB_600M_DISTILLED: (&'static str, &'static str) = (
27        "nllb200-distilled-600m/config",
28        "https://huggingface.co/datasets/vpermilp/nllb-200-distilled-600M-rust/raw/main/config.json",
29    );
30
31    pub const NLLB_1_3B: (&'static str, &'static str) = (
32        "nllb200-1_3b/config",
33        "https://huggingface.co/datasets/vpermilp/nllb-200-1.3B-rust/raw/main/config.json",
34    );
35}
36
37impl NLLBVocabResources {
38    pub const NLLB_600M_DISTILLED: (&'static str, &'static str) = (
39        "nllb200-distilled-600m/vocab",
40        "https://huggingface.co/datasets/vpermilp/nllb-200-distilled-600M-rust/resolve/main/tokenizer.json",
41    );
42
43    pub const NLLB_1_3B: (&'static str, &'static str) = (
44        "nllb200-1_3b/vocab",
45        "https://huggingface.co/datasets/vpermilp/nllb-200-1.3B-rust/resolve/main/tokenizer.json",
46    );
47}
48
49impl NLLBMergeResources {
50    pub const NLLB_600M_DISTILLED: (&'static str, &'static str) = (
51        "nllb200-distilled-600m/merge",
52        "https://huggingface.co/datasets/vpermilp/nllb-200-distilled-600M-rust/resolve/main/sentencepiece.bpe.model",
53    );
54
55    pub const NLLB_1_3B: (&'static str, &'static str) = (
56        "nllb200-1_3b/merge",
57        "https://huggingface.co/datasets/vpermilp/nllb-200-1.3B-rust/resolve/main/sentencepiece.bpe.model",
58    );
59}
60
61impl NLLBSpecialMap {
62    pub const NLLB_600M_DISTILLED: (&'static str, &'static str) = (
63        "nllb200-distilled-600m/special",
64        "htps://huggingface.co/datasets/vpermilp/nllb-200-distilled-600M-rust/raw/main/special_tokens_map.json",
65    );
66
67    pub const NLLB_1_3B: (&'static str, &'static str) = (
68        "nllb200-1_3b/special",
69        "https://huggingface.co/datasets/vpermilp/nllb-200-1.3B-rust/raw/main/special_tokens_map.json",
70    );
71}
72
73impl NLLBLanguages {
74    #[rustfmt::skip]
75    pub const NLLB: [Language; 201] = [
76        Afrikaans, Danish, Dutch, German, English, Icelandic, Luxembourgish, Swedish,
77        Asturian, Catalan, French, Galician, Italian, Occitan, Portuguese, Romanian, Spanish,
78        Belarusian, Bosnian, Bulgarian, Croatian, Czech, Macedonian, Polish, Russian, Serbian, Slovak,
79        Slovenian, Ukrainian, Estonian, Finnish, Hungarian, Latvian, Lithuanian, Albanian,
80        Armenian, Georgian, Greek, Irish, ScottishGaelic, Welsh, Bashkir, Kazakh,
81        Turkish, Uzbek, Japanese, Korean, Vietnamese, Bengali, Gujarati, Hindi, Kannada,
82        Marathi, Oriya, Panjabi, Sindhi, Sinhala, Urdu, Tamil, Cebuano, Iloko, Indonesian,
83        Javanese, Malay, Malayalam, Sundanese, Tagalog, Burmese, CentralKhmer, Lao, Thai, Hebrew, Amharic,
84        Hausa, Igbo, Lingala, Luganda, NorthernSotho, Somali, Swahili, Swati, Tswana, Wolof, Xhosa,
85        Yoruba, Zulu, HaitianCreole, Achinese, MesopotamianArabic, TaizziAdeniArabic, TunisianArabic, SouthLevantineArabic, Akan, NorthLevantineArabic, Arabic,
86        NajdiArabic, MoroccanArabic, EgyptianArabic, Assamese, Awadhi, CentralAymara, SouthAzerbaijani, NorthAzerbaijani, Bambara, Balinese,
87        Bemba, Bhojpuri, Banjar, Tibetan, Buginese, Chokwe, CentralKurdish, CrimeanTatar, SouthwesternDinka, Dyula, Dzongkha,
88        Esperanto, Basque, Ewe, Faroese, Fijian, Fon, Friulian, NigerianFulfulde, WestCentralOromo, Guarani, Haitian,
89        Chhattisgarhi, Kabyle, Kachin, Kamba, Kashmiri, Kabiye, Kabuverdianu, HalhMongolian, Khmer, Kikuyu,
90        Kinyarwanda, Kirghiz, Kimbundu, NorthernKurdish, CentralKanuri, Kongo, Ligurian, Limburgan, Lombard, Latgalian,
91        LubaLulua, Ganda, Luo, Lushai, Magahi, Maithili, Minangkabau, Maltese, Manipuri, Mossi,
92        Maori, NorwegianNynorsk, NorwegianBokmal, Pedi, Nuer, Nyanja, Odia, Pangasinan, Papiamento, SouthernPashto, IranianPersian,
93        PlateauMalagasy, Dari, AyacuchoQuechua, Rundi, Sango, Sanskrit, Santali, Sicilian, Shan, Samoan,
94        Shona, SouthernSotho, Sardinian, Silesian, Tamasheq, Tatar, Telugu, Tajik, Tigrinya, TokPisin,
95        Tsonga, Turkmen, Tumbuka, Twi, CentralAtlasTamazight, Uighur, Umbundu, NorthernUzbek, Venetian, Waray,
96        EasternYiddish, YueChinese, Chinese,
97    ];
98}
99
100pub type NLLBGenerator = M2M100Generator;