sophia_interfaces/
tokenizer.rs

1
2use std::collections::HashMap;
3use std::fmt;
4use serde::{Serialize, Deserialize};
5use std::ffi::{CString, NulError};
6use std::os::raw::{c_char, c_uint};
7
8#[derive(Serialize, Deserialize)]
9pub struct TokenizedOutput {
10    pub processing_time_ms: u128,
11    pub total_tokens: usize,
12    pub tokens: Vec<OutputToken>,
13    pub mwe: Vec<OutputToken>
14}
15
16#[derive(Default, Serialize, Deserialize, Debug, Clone, Hash)]
17pub struct OutputToken {
18    pub word: String,
19    pub index: i32,
20    pub pos: String,
21    pub potential_pos: Vec<String>,
22    pub stem: String,
23    pub potential_stems: Vec<String>,
24    pub antecedent: Option<String>,
25    pub placement: String,
26    pub is_possessive: bool,
27    pub is_negative: bool,
28    pub synonyms: Vec<String>,
29    pub hypernyms: Vec<String>,
30    pub hyponyms: Vec<String>,
31    pub categories: Vec<String>,
32    pub ner: Vec<String>,
33    pub inner_word: String,
34    pub inner_value: String,
35    pub inner_unit: String
36}
37
38impl fmt::Display for OutputToken {
39    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
40        if let Some(antecedent) = &self.antecedent {
41            write!(f, "{} ({}), antecedent: {}", self.word, self.pos, antecedent)
42        } else if self.pos.as_str() == "SYS" && !self.inner_word.is_empty() {
43            write!(f, "{} ({}), inner word: {}, value: {}, unit{}", self.word, self.pos, self.inner_word, self.inner_value, self.inner_unit)
44        } else {
45            write!(f, "{} ({})", self.word, self.pos)
46        }
47    }
48}
49
50#[derive(Default, Serialize, Deserialize, Debug, Clone, Hash)]
51pub struct OutputCategory {
52    pub fqn: String,
53    pub name: String,
54    pub words: Vec<String>
55}
56
57