reductionml_core/parsers/
text_parser.rs

1use std::{io::BufRead, sync::Arc};
2
3use crate::{
4    error::Result, object_pool::Pool, parsers::ParsedFeature,
5    sparse_namespaced_features::SparseFeatures, Features, FeaturesType, Label, LabelType,
6};
7
8pub trait TextModeParserFactory {
9    type Parser: TextModeParser;
10    fn create(
11        &self,
12        features_type: FeaturesType,
13        label_type: LabelType,
14        hash_seed: u32,
15        num_bits: u8,
16        pool: Arc<Pool<SparseFeatures>>,
17    ) -> Self::Parser;
18}
19
20#[derive(Clone, PartialEq, Eq, Hash)]
21pub enum ParsedNamespaceInfo<'a> {
22    Named(&'a str),
23    Default,
24}
25
26pub trait TextModeParser: Sync {
27    fn get_next_chunk(
28        &self,
29        input: &mut dyn BufRead,
30        output_buffer: String,
31    ) -> Result<Option<String>>;
32    fn parse_chunk<'a>(&self, chunk: &str) -> Result<(Features<'a>, Option<Label>)>;
33    fn extract_feature_names<'a>(
34        &self,
35        chunk: &'a str,
36    ) -> Result<std::collections::HashMap<ParsedNamespaceInfo<'a>, Vec<ParsedFeature<'a>>>> {
37        todo!()
38    }
39}