1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
use std::{io::BufRead, sync::Arc};

use crate::{
    error::Result, object_pool::Pool, parsers::ParsedFeature,
    sparse_namespaced_features::SparseFeatures, Features, FeaturesType, Label, LabelType,
};

pub trait TextModeParserFactory {
    type Parser: TextModeParser;
    fn create(
        &self,
        features_type: FeaturesType,
        label_type: LabelType,
        hash_seed: u32,
        num_bits: u8,
        pool: Arc<Pool<SparseFeatures>>,
    ) -> Self::Parser;
}

#[derive(Clone, PartialEq, Eq, Hash)]
pub enum ParsedNamespaceInfo<'a> {
    Named(&'a str),
    Default,
}

pub trait TextModeParser: Sync {
    fn get_next_chunk(
        &self,
        input: &mut dyn BufRead,
        output_buffer: String,
    ) -> Result<Option<String>>;
    fn parse_chunk<'a>(&self, chunk: &str) -> Result<(Features<'a>, Option<Label>)>;
    fn extract_feature_names<'a>(
        &self,
        chunk: &'a str,
    ) -> Result<std::collections::HashMap<ParsedNamespaceInfo<'a>, Vec<ParsedFeature<'a>>>>;
}