reductionml_core/parsers/
text_parser.rs1use std::{io::BufRead, sync::Arc};
2
3use crate::{
4 error::Result, object_pool::Pool, parsers::ParsedFeature,
5 sparse_namespaced_features::SparseFeatures, Features, FeaturesType, Label, LabelType,
6};
7
8pub trait TextModeParserFactory {
9 type Parser: TextModeParser;
10 fn create(
11 &self,
12 features_type: FeaturesType,
13 label_type: LabelType,
14 hash_seed: u32,
15 num_bits: u8,
16 pool: Arc<Pool<SparseFeatures>>,
17 ) -> Self::Parser;
18}
19
20#[derive(Clone, PartialEq, Eq, Hash)]
21pub enum ParsedNamespaceInfo<'a> {
22 Named(&'a str),
23 Default,
24}
25
26pub trait TextModeParser: Sync {
27 fn get_next_chunk(
28 &self,
29 input: &mut dyn BufRead,
30 output_buffer: String,
31 ) -> Result<Option<String>>;
32 fn parse_chunk<'a>(&self, chunk: &str) -> Result<(Features<'a>, Option<Label>)>;
33 fn extract_feature_names<'a>(
34 &self,
35 chunk: &'a str,
36 ) -> Result<std::collections::HashMap<ParsedNamespaceInfo<'a>, Vec<ParsedFeature<'a>>>> {
37 todo!()
38 }
39}