pub struct BioTokenizerConfig {
pub max_length: Option<usize>,
pub include_special_tokens: bool,
pub tokenize_dna: bool,
pub tokenize_rna: bool,
pub tokenize_proteins: bool,
pub kmer_size: Option<usize>,
pub overlapping_kmers: bool,
pub preserve_case: bool,
pub handle_ambiguous: bool,
pub tokenize_structure: bool,
pub vocab_size: Option<usize>,
}Expand description
Configuration for biological sequence tokenizer
Fields§
§max_length: Option<usize>Maximum sequence length
include_special_tokens: boolWhether to include special bio tokens
tokenize_dna: boolWhether to tokenize DNA sequences
tokenize_rna: boolWhether to tokenize RNA sequences
tokenize_proteins: boolWhether to tokenize protein sequences
kmer_size: Option<usize>K-mer size for subsequence tokenization
overlapping_kmers: boolWhether to use overlapping k-mers
preserve_case: boolWhether to preserve case (for mixed case sequences)
handle_ambiguous: boolWhether to handle ambiguous nucleotides/amino acids
tokenize_structure: boolWhether to tokenize secondary structure annotations
vocab_size: Option<usize>Vocabulary size limit
Trait Implementations§
Source§impl Clone for BioTokenizerConfig
impl Clone for BioTokenizerConfig
Source§fn clone(&self) -> BioTokenizerConfig
fn clone(&self) -> BioTokenizerConfig
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl Debug for BioTokenizerConfig
impl Debug for BioTokenizerConfig
Source§impl Default for BioTokenizerConfig
impl Default for BioTokenizerConfig
Source§impl<'de> Deserialize<'de> for BioTokenizerConfig
impl<'de> Deserialize<'de> for BioTokenizerConfig
Source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Deserialize this value from the given Serde deserializer. Read more
Auto Trait Implementations§
impl Freeze for BioTokenizerConfig
impl RefUnwindSafe for BioTokenizerConfig
impl Send for BioTokenizerConfig
impl Sync for BioTokenizerConfig
impl Unpin for BioTokenizerConfig
impl UnsafeUnpin for BioTokenizerConfig
impl UnwindSafe for BioTokenizerConfig
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> ConfigSerializable for Twhere
T: Serialize + for<'de> Deserialize<'de>,
impl<T> ConfigSerializable for Twhere
T: Serialize + for<'de> Deserialize<'de>,
Source§fn save_to_file(&self, path: &Path) -> Result<(), TrustformersError>
fn save_to_file(&self, path: &Path) -> Result<(), TrustformersError>
Save to file
Source§fn load_from_file(path: &Path) -> Result<Self, TrustformersError>where
Self: Sized,
fn load_from_file(path: &Path) -> Result<Self, TrustformersError>where
Self: Sized,
Load from file
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more