rust_bert/pipelines/translation/mod.rs
1//! # Translation pipeline
2//!
3//! Pipeline and utilities to perform translation from a source to a target languages. Multiple model architectures
4//! (Marian, T5, MBart or M2M100) are supported offering a wide range of model size and multilingual capabilities.
5//! A high number of configuration options exist, including:
6//! - Model type
7//! - Model resources (weights, tokenizer and configuration files)
8//! - Set of source languages and target languages supported by the model (if multilingual). This should be a array-like of `Language`, with presets existing for the pretrained models registered in this library
9//! - Device placement (CPU or CUDA)
10//!
11//! The user may provide these inputs directly by creating an adequate `TranslationConfig`, examples are provided in
12//! `examples/translation_marian.rs` or `examples/translation_m2m100.rs`. A `TranslationModel` is created from the `TranslationConfig`
13//! and takes input text with optional source/target languages to perform translation. Models with a single source/target language translation
14//! do not require further specification. Multilingual models with multiple possible output languages require specifying the target language to translate to.
15//! Models with multiple possible source language require specifying the source language for M2M100 and MBart models (and is optional for Marian models)
16//!
17//! ```no_run
18//! use rust_bert::m2m_100::{
19//! M2M100ConfigResources, M2M100MergesResources, M2M100ModelResources, M2M100SourceLanguages,
20//! M2M100TargetLanguages, M2M100VocabResources,
21//! };
22//! use rust_bert::pipelines::common::ModelType;
23//! use rust_bert::pipelines::translation::{Language, TranslationConfig, TranslationModel};
24//! use rust_bert::resources::RemoteResource;
25//! use tch::Device;
26//!
27//! fn main() -> anyhow::Result<()> {
28//! use rust_bert::pipelines::common::ModelResource;
29//! let model_resource = RemoteResource::from_pretrained(M2M100ModelResources::M2M100_418M);
30//! let config_resource = RemoteResource::from_pretrained(M2M100ConfigResources::M2M100_418M);
31//! let vocab_resource = RemoteResource::from_pretrained(M2M100VocabResources::M2M100_418M);
32//! let merges_resource = RemoteResource::from_pretrained(M2M100MergesResources::M2M100_418M);
33//!
34//! let source_languages = M2M100SourceLanguages::M2M100_418M;
35//! let target_languages = M2M100TargetLanguages::M2M100_418M;
36//!
37//! let translation_config = TranslationConfig::new(
38//! ModelType::M2M100,
39//! ModelResource::Torch(Box::new(model_resource)),
40//! config_resource,
41//! vocab_resource,
42//! Some(merges_resource),
43//! source_languages,
44//! target_languages,
45//! Device::cuda_if_available(),
46//! );
47//! let model = TranslationModel::new(translation_config)?;
48//! let source_sentence = "This sentence will be translated in multiple languages.";
49//!
50//! let mut outputs = Vec::new();
51//! outputs.extend(model.translate(&[source_sentence], Language::English, Language::French)?);
52//! outputs.extend(model.translate(
53//! &[source_sentence],
54//! Language::English,
55//! Language::Spanish,
56//! )?);
57//! outputs.extend(model.translate(&[source_sentence], Language::English, Language::Hindi)?);
58//!
59//! for sentence in outputs {
60//! println!("{}", sentence);
61//! }
62//! Ok(())
63//! }
64//! ```
65//!
66//! The scenario above requires the user to know the kind of model to be used for translation. In order to facilitate the selection of
67//! an efficient configuration, a model builder is also available allowing to specify a flexible number of constraints and returning a
68//! recommended model that fulfill the provided requirements. An example of using such a `TranslationBuilder` is given below:
69//!
70//! ```no_run
71//! use rust_bert::pipelines::translation::{Language, TranslationModelBuilder};
72//! fn main() -> anyhow::Result<()> {
73//! let model = TranslationModelBuilder::new()
74//! .with_source_languages(vec![Language::English])
75//! .with_target_languages(vec![Language::Spanish, Language::French, Language::Italian])
76//! .create_model()?;
77//!
78//! let input_context_1 = "This is a sentence to be translated";
79//! let input_context_2 = "The dog did not wake up.";
80//!
81//! let output =
82//! model.translate(&[input_context_1, input_context_2], None, Language::Spanish)?;
83//!
84//! for sentence in output {
85//! println!("{}", sentence);
86//! }
87//! Ok(())
88//! }
89//! ```
90
91mod translation_builder;
92mod translation_pipeline;
93
94pub use translation_pipeline::{Language, TranslationConfig, TranslationModel, TranslationOption};
95
96pub use translation_builder::TranslationModelBuilder;