gliner/model/input/
text.rs

1use std::path::Path;
2use crate::util::result::Result;
3
4/// Represents the raw text input, as a list of text chunks and a list of entity classes
5#[derive(Debug, Clone)]
6pub struct TextInput {
7    pub texts: Vec<String>,
8    pub entities: Vec<String>,
9}
10
11
12impl TextInput {
13
14    /// Default constructor that moves the input data given as a vector of the text 
15    /// sequences to be analyzed, and a vector of entity classes.
16    pub fn new(texts: Vec<String>, entities: Vec<String>) -> Result<Self> {
17        if texts.is_empty() || entities.is_empty() {
18            Err("invalid input: empty texts and/or entities".into())
19        }
20        else {
21            Ok(Self { texts, entities })
22        }
23    }
24
25    /// This constructor will mostly be used to test with plain arrays of static `str`s.
26    pub fn from_str(texts: &[&str], entities: &[&str]) -> Result<Self> {
27        Self::new(
28            texts.iter().map(|s| s.to_string()).collect(),
29            entities.iter().map(|s| s.to_string()).collect(),
30        )
31    }
32
33    /// For testing purposes. 
34    /// Panics if the specified column does not exist
35    pub fn new_from_csv<P: AsRef<Path>>(path: P, column: usize, limit: usize, entities: Vec<String>) -> Result<Self> {
36        let mut csv = csv::Reader::from_path(path)?;
37        let texts: Vec<String> = csv.records()
38            .take(limit)
39            .map(|r| r.unwrap().get(column).unwrap().to_string())
40            .collect();
41        Self::new(texts, entities)
42    }
43
44}