oxirs-vec 0.2.4

Vector index abstractions for semantic similarity and AI-augmented querying
Documentation
//! # SentenceTransformerGenerator - simulate_wordpiece_tokenization_group Methods
//!
//! This module contains method implementations for `SentenceTransformerGenerator`.
//!
//! 🤖 Generated with [SplitRS](https://github.com/cool-japan/splitrs)

use super::sentencetransformergenerator_type::SentenceTransformerGenerator;

impl SentenceTransformerGenerator {
    /// Simulate WordPiece tokenization (used by BERT)
    pub(super) fn simulate_wordpiece_tokenization(
        &self,
        word: &str,
        vocab_size: usize,
    ) -> Vec<u32> {
        if word.len() <= 6 {
            vec![self.word_to_token_id(word, vocab_size)]
        } else {
            let mid = word.len() / 2;
            vec![
                self.word_to_token_id(&word[..mid], vocab_size),
                self.word_to_token_id(&format!("##{}", &word[mid..]), vocab_size),
            ]
        }
    }
}