1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::fs::File;
use std::io::{ Error, ErrorKind, BufReader };
use serde_json;
use serde_json::Value;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Document {
pub id: usize,
pub label: String,
pub text: String,
pub tf: HashMap<String, u32>
}
impl Document {
pub fn new(id: usize, label: String, text: String) -> Document {
Document {
id: id,
label: label,
text: text,
tf: HashMap::new()
}
}
pub fn add_tf(&mut self, tokens: &Vec<String>) {
for token in tokens.iter() {
let counter = self.tf.entry(token.to_string()).or_insert(0);
*counter += 1u32
}
}
}
pub fn from_json_file(file_path: PathBuf) -> Result<Document, Error> {
if let Ok(file) = File::open(file_path) {
parse_from_file(file)
} else {
Err(Error::new(ErrorKind::NotFound, "No such file"))
}
}
pub fn parse_from_file(file: File) -> Result<Document, Error> {
let rdr = BufReader::new(file);
let val:Value = serde_json::from_reader(rdr).expect("Failed to parse the file");
let lic = val.as_object().expect("Failed to unpack JSON hashmap");
let temp_doc = Document::new(
0,
lic["licenseId"].as_str().unwrap_or("").to_string(),
lic["licenseText"].as_str().unwrap_or("").to_string()
);
Ok(temp_doc)
}
pub fn read_folder(path: &Path) -> Result<Vec<Document>, Error> {
let mut docs: Vec<Document> = Vec::new();
for entry in path.read_dir().expect("read_dir failed") {
if let Ok(metadata) = entry {
match from_json_file(metadata.path()) {
Ok(doc) => docs.push(doc),
Err(e) => {
println!("Failed to parse: {:?}", metadata.path());
return Err(e)
}
}
}
};
Ok(docs)
}