use syntaxdot_tokenizers::Tokenize;
use crate::encoders::NamedEncoder;
use crate::error::SyntaxDotError;
use crate::tensor::Tensors;
pub(crate) mod tensor_iter;
pub use conll::ConlluDataSet;
mod conll;
pub(crate) mod sentence_iter;
pub trait DataSet<'a> {
type Iter: Iterator<Item = Result<Tensors, SyntaxDotError>>;
#[allow(clippy::too_many_arguments)]
fn batches(
self,
tokenizer: &'a dyn Tokenize,
encoders: Option<&'a [NamedEncoder]>,
batch_size: usize,
max_len: Option<SequenceLength>,
shuffle_buffer_size: Option<usize>,
) -> Result<Self::Iter, SyntaxDotError>;
}
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub enum SequenceLength {
Tokens(usize),
Pieces(usize),
}