rust_transformers 0.2.0

# Copyright 2018 The HuggingFace Inc. team.
# Copyright 2019 Guillaume Becquin
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#     http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import tempfile
from pathlib import Path
import gc
from transformers.file_utils import get_from_cache
from transformers.tokenization_distilbert import DistilBertTokenizer
from rust_transformers import PyBertTokenizer
from transformers.modeling_distilbert import DistilBertForSequenceClassification
import torch
from timeit import default_timer as timer


class TestBenchmarkDistilBert:
    def setup_class(self):
        self.use_gpu = torch.cuda.is_available()
        self.test_dir = Path(tempfile.mkdtemp())

        self.base_tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased', do_lower_case=True,
                                                                  cache_dir=self.test_dir)
        self.rust_tokenizer = PyBertTokenizer(
            get_from_cache(self.base_tokenizer.pretrained_vocab_files_map['vocab_file']['distilbert-base-uncased']))
        self.model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased',
                                                                         output_attentions=False).eval()
        if self.use_gpu:
            self.model.cuda()
        self.sentence_list = ['For instance, on the planet Earth, man had always assumed that he was more intelligent '
                              'than dolphins because he had achieved so much—the wheel, New York, wars and so on—whilst'
                              ' all the dolphins had ever done was muck about in the water having a good time. But '
                              'conversely, the dolphins had always believed that they were far more intelligent than '
                              'man—for precisely the same reasons.'] * 64

        # Pre-allocate GPU memory
        tokens_list = [self.base_tokenizer.tokenize(sentence) for sentence in self.sentence_list]
        features = [self.base_tokenizer.convert_tokens_to_ids(tokens) for tokens in tokens_list]
        features = [self.base_tokenizer.prepare_for_model(input, None, add_special_tokens=True, max_length=128) for
                    input
                    in features]
        all_input_ids = torch.tensor([f['input_ids'] for f in features], dtype=torch.long)

        if self.use_gpu:
            all_input_ids = all_input_ids.cuda()

        with torch.no_grad():
            _ = self.model(all_input_ids)[0].cpu().numpy()

    def setup_base_tokenizer(self):
        self.base_tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased', do_lower_case=True,
                                                                  cache_dir=self.test_dir)

    def setup_rust_tokenizer(self):
        self.rust_tokenizer = PyBertTokenizer(
            get_from_cache(self.base_tokenizer.pretrained_vocab_files_map['vocab_file']['distilbert-base-uncased']))

    def baseline_batch(self):
        tokens_list = [self.base_tokenizer.tokenize(sentence) for sentence in self.sentence_list]
        features = [self.base_tokenizer.convert_tokens_to_ids(tokens) for tokens in tokens_list]
        features = [self.base_tokenizer.prepare_for_model(input,
                                                          None,
                                                          add_special_tokens=True,
                                                          max_length=128) for input in features]
        all_input_ids = torch.tensor([f['input_ids'] for f in features], dtype=torch.long)
        if self.use_gpu:
            all_input_ids = all_input_ids.cuda()
        with torch.no_grad():
            output = self.model(all_input_ids)[0].cpu().numpy()
        return output

    def rust_batch_single_threaded(self):
        features = [self.rust_tokenizer.encode(sentence,
                                               max_len=128,
                                               truncation_strategy='longest_first',
                                               stride=0) for sentence in self.sentence_list]
        all_input_ids = torch.tensor([f.token_ids for f in features], dtype=torch.long)
        if self.use_gpu:
            all_input_ids = all_input_ids.cuda()
        with torch.no_grad():
            output = self.model(all_input_ids)[0].cpu().numpy()
        return output

    def rust_batch_multi_threaded(self):
        features = self.rust_tokenizer.encode_list(self.sentence_list,
                                                   max_len=128,
                                                   truncation_strategy='longest_first',
                                                   stride=0)
        all_input_ids = torch.tensor([f.token_ids for f in features], dtype=torch.long)
        if self.use_gpu:
            all_input_ids = all_input_ids.cuda()
        with torch.no_grad():
            output = self.model(all_input_ids)[0].cpu().numpy()
        return output

    def test_distilbert_baseline(self):
        values = []
        for i in range(10):
            self.setup_base_tokenizer()
            t0 = timer()
            self.baseline_batch()
            t1 = timer()
            values.append((t1 - t0) * 1000)
        mean = sum(values) / len(values)
        std_dev = math.sqrt(sum([(value - mean) ** 2 for value in values])) / (len(values) - 1)
        print(f'baseline - mean: {mean:.2f}, std. dev: {std_dev:.2f}')

    def test_distilbert_rust_single_threaded(self):
        values = []
        for i in range(10):
            self.setup_rust_tokenizer()
            t0 = timer()
            self.rust_batch_single_threaded()
            t1 = timer()
            values.append((t1 - t0) * 1000)
        mean = sum(values) / len(values)
        std_dev = math.sqrt(sum([(value - mean) ** 2 for value in values])) / (len(values) - 1)
        print(f'rust single thread - mean: {mean:.2f}, std. dev: {std_dev:.2f}')

    def test_distilbert_rust_multi_threaded(self):
        values = []
        for i in range(10):
            self.setup_rust_tokenizer()
            t0 = timer()
            self.rust_batch_multi_threaded()
            t1 = timer()
            values.append((t1 - t0) * 1000)
        mean = sum(values) / len(values)
        std_dev = math.sqrt(sum([(value - mean) ** 2 for value in values])) / (len(values) - 1)
        print(f'rust multi threaded - mean: {mean:.2f}, std. dev: {std_dev:.2f}')

    def teardown_class(self):
        self.model = None
        self.base_tokenizer = None
        self.rust_tokenizer = None
        gc.collect()
        torch.cuda.empty_cache()