Struct rust_bert::pipelines::question_answering::QuestionAnsweringConfig[−][src]

pub struct QuestionAnsweringConfig {Show 13 fields
    pub model_resource: Resource,
    pub config_resource: Resource,
    pub vocab_resource: Resource,
    pub merges_resource: Option<Resource>,
    pub device: Device,
    pub model_type: ModelType,
    pub lower_case: bool,
    pub strip_accents: Option<bool>,
    pub add_prefix_space: Option<bool>,
    pub max_seq_length: usize,
    pub doc_stride: usize,
    pub max_query_length: usize,
    pub max_answer_length: usize,
}

Expand description

Configuration for question answering

Contains information regarding the model to load and device to place the model on.

Fields

model_resource: Resource

Model weights resource (default: pretrained DistilBERT model on SQuAD)

config_resource: Resource

Config resource (default: pretrained DistilBERT model on SQuAD)

vocab_resource: Resource

Vocab resource (default: pretrained DistilBERT model on SQuAD)

merges_resource: Option<Resource>

Merges resource (default: None)

device: Device

Device to place the model on (default: CUDA/GPU when available)

model_type: ModelType

Model type

lower_case: bool

Flag indicating if the model expects a lower casing of the input

strip_accents: Option<bool>

Flag indicating if the tokenizer should strip accents (normalization). Only used for BERT / ALBERT models

add_prefix_space: Option<bool>

Flag indicating if the tokenizer should add a white space before each tokenized input (needed for some Roberta models)

max_seq_length: usize

Maximum sequence length for the combined query and context

doc_stride: usize

Stride to apply if the context needs to be broken down due to a large length. Represents the number of overlapping tokens between sliding windows.

max_query_length: usize

Maximum length for the query

max_answer_length: usize

Maximum length for the answer

Implementations

[src]

impl QuestionAnsweringConfig

[src]

pub fn new(
 model_type: ModelType,
 model_resource: Resource,
 config_resource: Resource,
 vocab_resource: Resource,
 merges_resource: Option<Resource>,
 lower_case: bool,
 strip_accents: impl Into<Option<bool>>,
 add_prefix_space: impl Into<Option<bool>>
) -> QuestionAnsweringConfig

Instantiate a new question answering configuration of the supplied type.

Arguments

model_type - ModelType indicating the model type to load (must match with the actual data to be loaded!)
model_resource - The Resource pointing to the model to load (e.g. model.ot)
config_resource - The `Resource’ pointing to the model configuration to load (e.g. config.json)
vocab_resource - The `Resource’ pointing to the tokenizer’s vocabulary to load (e.g. vocab.txt/vocab.json)
merges_resource - An optional Resource tuple (Option<Resource>) pointing to the tokenizer’s merge file to load (e.g. merges.txt), needed only for Roberta.
lower_case - A `bool’ indicating whether the tokenizer should lower case all input (in case of a lower-cased model)

[src]

pub fn custom_new(
 model_type: ModelType,
 model_resource: Resource,
 config_resource: Resource,
 vocab_resource: Resource,
 merges_resource: Option<Resource>,
 lower_case: bool,
 strip_accents: impl Into<Option<bool>>,
 add_prefix_space: impl Into<Option<bool>>,
 max_seq_length: impl Into<Option<usize>>,
 doc_stride: impl Into<Option<usize>>,
 max_query_length: impl Into<Option<usize>>,
 max_answer_length: impl Into<Option<usize>>
) -> QuestionAnsweringConfig

Instantiate a new question answering configuration of the supplied type.

Arguments

model_type - ModelType indicating the model type to load (must match with the actual data to be loaded!)
model_resource - The Resource pointing to the model to load (e.g. model.ot)
config_resource - The `Resource’ pointing to the model configuration to load (e.g. config.json)
vocab_resource - The `Resource’ pointing to the tokenizer’s vocabulary to load (e.g. vocab.txt/vocab.json)
merges_resource - An optional Resource tuple (Option<Resource>) pointing to the tokenizer’s merge file to load (e.g. merges.txt), needed only for Roberta.
lower_case - A `bool’ indicating whether the tokenizer should lower case all input (in case of a lower-cased model)
max_seq_length - Optional maximum sequence token length to limit memory footprint. If the context is too long, it will be processed with sliding windows. Defaults to 384.
max_query_length - Optional maximum question token length. Defaults to 64.
doc_stride - Optional stride to apply if a sliding window is required to process the input context. Represents the number of overlapping tokens between sliding windows. This should be lower than the max_seq_length minus max_query_length (otherwise there is a risk for the sliding window not to progress). Defaults to 128.
max_answer_length - Optional maximum token length for the extracted answer. Defaults to 15.