1#![crate_name = "jsonnlp"]
2
3
4use serde_json;
20use serde;
21use serde::{Serialize, Deserialize};
22use std::error::Error;
23use std::fs::File;
24use std::io::BufReader;
25use std::path::Path;
26
27#[derive(Serialize, Deserialize)]
30pub struct Meta {
31 #[serde(rename = "DC.conformsTo")]
32 #[serde(skip_serializing_if = "String::is_empty")]
33 conforms_to: String,
34 #[serde(rename = "DC.author")]
35 #[serde(skip_serializing_if = "String::is_empty")]
36 author: String,
37 #[serde(rename = "DC.created")]
38 #[serde(skip_serializing_if = "String::is_empty")]
39 created: String,
40 #[serde(rename = "DC.date")]
41 #[serde(skip_serializing_if = "String::is_empty")]
42 date: String,
43 #[serde(rename = "DC.source")]
44 #[serde(skip_serializing_if = "String::is_empty")]
45 source: String,
46 #[serde(rename = "DC.language")]
47 #[serde(skip_serializing_if = "String::is_empty")]
48 language: String,
49 #[serde(rename = "DC.creator")]
50 #[serde(skip_serializing_if = "String::is_empty")]
51 creator: String,
52 #[serde(rename = "DC.publisher")]
53 #[serde(skip_serializing_if = "String::is_empty")]
54 publisher: String,
55 #[serde(rename = "DC.title")]
56 #[serde(skip_serializing_if = "String::is_empty")]
57 title: String,
58 #[serde(rename = "DC.description")]
59 #[serde(skip_serializing_if = "String::is_empty")]
60 description: String,
61 #[serde(rename = "DC.identifier")]
62 #[serde(skip_serializing_if = "String::is_empty")]
63 identifier: String,
64}
65
66#[derive(Serialize, Deserialize)]
68pub struct TokenFeatures {
69 overt: bool,
70 stop: bool,
71 alpha: bool,
72 number: u8,
73 #[serde(skip_serializing_if = "String::is_empty")]
74 gender: String,
75 person: u8,
76 #[serde(skip_serializing_if = "String::is_empty")]
77 tense: String,
78 perfect: bool,
79 continuous: bool,
80 progressive: bool,
81 #[serde(skip_serializing_if = "String::is_empty")]
82 case: String,
83 human: bool,
84 animate: bool,
85 negated: bool,
86 countable: bool,
87 factive: bool,
88 counterfactive: bool,
89 irregular: bool,
90 #[serde(rename = "phrasalVerb")]
91 phrasalverb: bool,
92 #[serde(skip_serializing_if = "String::is_empty")]
93 mood: String,
94 foreign: bool,
95 #[serde(rename = "spaceAfter")]
96 spaceafter: bool,
97}
98
99#[derive(Serialize, Deserialize)]
101pub struct Token {
102 id: u64,
103 sentence_id: u64,
104 text: String,
105 lemma: String,
106 #[serde(skip_serializing_if = "String::is_empty")]
107 xpos: String,
108 xpos_prob: f64,
109 #[serde(skip_serializing_if = "String::is_empty")]
110 upos: String,
111 upos_prob: f64,
112 #[serde(skip_serializing_if = "String::is_empty")]
113 entity_iob: String,
114 #[serde(rename = "characterOffsetBegin")]
115 char_offset_begin: u64,
116 #[serde(rename = "characterOffsetEnd")]
117 char_offset_end: u64,
118 #[serde(skip_serializing_if = "String::is_empty")]
119 #[serde(rename = "propID")]
120 prop_id: String,
121 #[serde(rename = "propIDProbability")]
122 prop_id_prob: f64,
123 #[serde(rename = "frameID")]
124 frame_id: u64,
125 #[serde(rename = "frameIDProb")]
126 frame_id_prob: f64,
127 #[serde(rename = "wordNetID")]
128 wordnet_id: u64,
129 #[serde(rename = "wordNetIDProb")]
130 wordnet_id_prob: f64,
131 #[serde(rename = "verbNetID")]
132 verbnet_id: u64,
133 #[serde(rename = "verbNetIDProb")]
134 verbnet_id_prob: f64,
135 #[serde(skip_serializing_if = "String::is_empty")]
136 lang: String,
137 features: TokenFeatures,
138 #[serde(skip_serializing_if = "String::is_empty")]
139 shape: String,
140 #[serde(skip_serializing_if = "String::is_empty")]
141 entity: String,
142}
143
144#[derive(Serialize, Deserialize)]
146pub struct Sentence {
147 id: u64,
148 #[serde(rename = "tokenFrom")]
149 token_from: u64,
150 #[serde(rename = "tokenTo")]
151 token_to: u64,
152 tokens: Vec<u64>,
153 clauses: Vec<u64>,
154 #[serde(rename = "type")]
155 #[serde(skip_serializing_if = "String::is_empty")]
156 stype: String,
157 #[serde(skip_serializing_if = "String::is_empty")]
158 sentiment: String,
159 #[serde(rename = "sentimentProb")]
160 sentiment_prob: f64,
161}
162
163#[derive(Serialize, Deserialize)]
165pub struct Clause {
166 id: u64,
167 #[serde(rename = "sentenceId")]
168 sentence_id: u64,
169 #[serde(rename = "tokenFrom")]
170 token_from: u64,
171 #[serde(rename = "tokenTo")]
172 token_to: u64,
173 tokens: Vec<u64>,
174 main: bool,
175 gov: u64,
176 head: u64,
177 neg: bool,
178 #[serde(skip_serializing_if = "String::is_empty")]
179 tense: String,
180 #[serde(skip_serializing_if = "String::is_empty")]
181 mood: String,
182 perfect: bool,
183 continuous: bool,
184 #[serde(skip_serializing_if = "String::is_empty")]
185 aspect: String,
186 #[serde(skip_serializing_if = "String::is_empty")]
187 voice: String,
188 #[serde(skip_serializing_if = "String::is_empty")]
189 sentiment: String,
190 #[serde(rename = "sentimentProb")]
191 sentiment_prob: f64,
192}
193
194#[derive(Serialize, Deserialize)]
198pub struct Dependency {
199 lab: String,
200 gov: u64,
201 dep: u64,
202 prob: f64,
203}
204
205#[derive(Serialize, Deserialize)]
209pub struct DependencyTree {
210 #[serde(rename = "sentenceId")]
211 sentence_id: u64,
212 #[serde(skip_serializing_if = "String::is_empty")]
213 style: String,
214 dependencies: Vec<Dependency>,
215 prob: f64,
216}
217
218#[derive(Serialize, Deserialize)]
220pub struct CoreferenceRepresentantive {
221 tokens: Vec<u64>,
222 head: u64,
223}
224
225#[derive(Serialize, Deserialize)]
227pub struct CoreferenceReferents {
228 tokens: Vec<u64>,
229 head: u64,
230 prob: f64,
231}
232
233#[derive(Serialize, Deserialize)]
235pub struct Coreference {
236 id: u64,
237 representative: CoreferenceRepresentantive,
238 referents: Vec<CoreferenceReferents>,
239}
240
241#[derive(Serialize, Deserialize)]
243pub struct Scope {
244 id: u64,
245 gov: Vec<u64>,
246 dep: Vec<u64>,
247 terminals: Vec<u64>,
248}
249
250#[derive(Serialize, Deserialize)]
252pub struct ConstituentParse {
253 #[serde(rename = "sentenceId")]
254 sentence_id: u64,
255 #[serde(rename = "type")]
256 #[serde(skip_serializing_if = "String::is_empty")]
257 ctype: String,
258 #[serde(rename = "labeledBracketing")]
259 #[serde(skip_serializing_if = "String::is_empty")]
260 labeled_bracketing: String,
261 prob: f64,
262 scopes: Vec<Scope>,
263}
264
265#[derive(Serialize, Deserialize)]
267pub struct Expression {
268 id: u64,
269 #[serde(rename = "type")]
270 #[serde(skip_serializing_if = "String::is_empty")]
271 etype: String,
272 head: u64,
273 #[serde(skip_serializing_if = "String::is_empty")]
274 dependency: String,
275 #[serde(rename = "tokenFrom")]
276 token_from: u64,
277 #[serde(rename = "tokenTo")]
278 token_to: u64,
279 tokens: Vec<u64>,
280 prob: f64,
281}
282
283#[derive(Serialize, Deserialize)]
285pub struct Paragraph {
286 id: u64,
287 #[serde(rename = "tokenFrom")]
288 token_from: u64,
289 #[serde(rename = "tokenTo")]
290 token_to: u64,
291 tokens: Vec<u64>,
292 sentences: Vec<u64>,
293}
294
295#[derive(Serialize, Deserialize)]
297pub struct Attribute {
298 lab: String,
299 val: String,
300}
301
302#[derive(Serialize, Deserialize)]
304pub struct Entity {
305 id: u64,
306 #[serde(skip_serializing_if = "String::is_empty")]
307 label: String,
308 #[serde(rename = "type")]
309 #[serde(skip_serializing_if = "String::is_empty")]
310 etype: String,
311 #[serde(skip_serializing_if = "String::is_empty")]
312 url: String,
313 head: u64,
314 #[serde(rename = "tokenFrom")]
315 token_from: u64,
316 #[serde(rename = "tokenTo")]
317 token_to: u64,
318 tokens: Vec<u64>,
319 #[serde(rename = "tripleID")]
320 triple_id: u64,
321 #[serde(skip_serializing_if = "String::is_empty")]
322 sentiment: String,
323 #[serde(rename = "sentimentProb")]
324 sentiment_prob: f64,
325 count: u64,
326 attributes: Vec<Attribute>,
327}
328
329#[derive(Serialize, Deserialize)]
331pub struct Relation {
332 id: u64,
333 #[serde(skip_serializing_if = "String::is_empty")]
334 label: String,
335 #[serde(rename = "type")]
336 #[serde(skip_serializing_if = "String::is_empty")]
337 rtype: String,
338 #[serde(skip_serializing_if = "String::is_empty")]
339 url: String,
340 head: u64,
341 #[serde(rename = "tokenFrom")]
342 token_from: u64,
343 #[serde(rename = "tokenTo")]
344 token_to: u64,
345 tokens: Vec<u64>,
346 #[serde(skip_serializing_if = "String::is_empty")]
347 sentiment: String,
348 #[serde(rename = "sentimentProb")]
349 sentiment_prob: f64,
350 count: u64,
351 attributes: Vec<Attribute>,
352}
353
354#[derive(Serialize, Deserialize)]
356pub struct Triple {
357 id: u64,
358 #[serde(rename = "fromEntity")]
359 from_entity: u64,
360 #[serde(rename = "toEntity")]
361 to_entity: u64,
362 rel: u64,
363 #[serde(rename = "clauseID")]
364 clause_id: Vec<u64>,
365 #[serde(rename = "sentenceID")]
366 sentence_id: Vec<u64>,
367 directional: bool,
368 #[serde(rename = "eventID")]
369 event_id: u64,
370 #[serde(rename = "tempSeq")]
371 temp_seq: u64,
372 prob: f64,
373 syntactic: bool,
374 implied: bool,
375 presupposed: bool,
376 count: u64,
377}
378
379#[derive(Serialize, Deserialize)]
381pub struct Document {
382 meta: Meta,
383 id: u64,
384 #[serde(rename = "tokenList")]
385 token_list: Vec<Token>,
386 clauses: Vec<Clause>,
387 sentences: Vec<Sentence>,
388 paragraphs: Vec<Paragraph>,
389 #[serde(rename = "dependencyTrees")]
390 dependency_trees: Vec<DependencyTree>,
391 coreferences: Vec<Coreference>,
392 constituents: Vec<ConstituentParse>,
393 expressions: Vec<Expression>,
394 entities: Vec<Entity>,
395 relations: Vec<Relation>,
396 triples: Vec<Triple>,
397}
398
399#[derive(Serialize, Deserialize)]
401pub struct JSONNLP {
402 meta: Meta,
403 docs: Vec<Document>,
404}
405
406pub fn from_string(json: &str) -> Result<JSONNLP, Box<dyn Error>> {
408 let r = serde_json::from_str::<JSONNLP>(json).unwrap();
409 Ok(r)
410}
411
412pub fn from_file<P: AsRef<Path>>(path: P) -> Result<JSONNLP, Box<dyn Error>> {
414 let file = File::open(path)?;
415 let reader = BufReader::new(file);
416 let u = serde_json::from_reader(reader)?;
417 Ok(u)
418}
419
420pub fn get_json(j: &JSONNLP) -> Result<String, Box<dyn Error>> {
422 let r = serde_json::to_string(j).unwrap();
423 Ok(r)
424}