sophia_interfaces/
tokenizer.rs1
2use std::collections::HashMap;
3use std::fmt;
4use serde::{Serialize, Deserialize};
5use std::ffi::{CString, NulError};
6use std::os::raw::{c_char, c_uint};
7
8#[derive(Serialize, Deserialize)]
9pub struct TokenizedOutput {
10 pub processing_time_ms: u128,
11 pub total_tokens: usize,
12 pub tokens: Vec<OutputToken>,
13 pub mwe: Vec<OutputToken>
14}
15
16#[derive(Default, Serialize, Deserialize, Debug, Clone, Hash)]
17pub struct OutputToken {
18 pub word: String,
19 pub index: i32,
20 pub pos: String,
21 pub potential_pos: Vec<String>,
22 pub stem: String,
23 pub potential_stems: Vec<String>,
24 pub antecedent: Option<String>,
25 pub placement: String,
26 pub is_possessive: bool,
27 pub is_negative: bool,
28 pub synonyms: Vec<String>,
29 pub hypernyms: Vec<String>,
30 pub hyponyms: Vec<String>,
31 pub categories: Vec<String>,
32 pub ner: Vec<String>,
33 pub inner_word: String,
34 pub inner_value: String,
35 pub inner_unit: String
36}
37
38impl fmt::Display for OutputToken {
39 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
40 if let Some(antecedent) = &self.antecedent {
41 write!(f, "{} ({}), antecedent: {}", self.word, self.pos, antecedent)
42 } else if self.pos.as_str() == "SYS" && !self.inner_word.is_empty() {
43 write!(f, "{} ({}), inner word: {}, value: {}, unit{}", self.word, self.pos, self.inner_word, self.inner_value, self.inner_unit)
44 } else {
45 write!(f, "{} ({})", self.word, self.pos)
46 }
47 }
48}
49
50#[derive(Default, Serialize, Deserialize, Debug, Clone, Hash)]
51pub struct OutputCategory {
52 pub fqn: String,
53 pub name: String,
54 pub words: Vec<String>
55}
56
57