1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
use std::error;
use std::fmt;
use std::fs::File;
use std::io;
use std::path::{Path, PathBuf};
use std::sync::{RwLock};
use serde_json::{self as json};
use klpair::KLPair;
use language::{Corpus, Language, mins, tag};
use exception::{Exceptions};
use pattern::{Patterns};
lazy_static! {
static ref PATTERN_FOLDER: RwLock<PathBuf> = RwLock::new(PathBuf::new());
}
pub fn set_pattern_folder(path: &Path) {
let mut folder = PATTERN_FOLDER.write().unwrap();
folder.push(path);
}
pub fn data_file(lang: Language, suffix: &str) -> io::Result<File> {
let fname = format!("hyph-{}.{}.json", tag(lang), suffix);
let as_set = PATTERN_FOLDER.read().unwrap();
let mut fpath = PathBuf::new();
fpath.push(&*as_set);
fpath.push(fname);
File::open(fpath)
}
pub fn patterns(lang: Language) -> Result<Vec<KLPair>, Error> {
let f = try!(data_file(lang, "pat"));
let pairs: Vec<(String, Vec<u32>)> = try!(json::from_reader(f));
Ok(pairs)
}
pub fn exceptions(lang: Language) -> Result<Vec<KLPair>, Error> {
let f = try!(data_file(lang, "hyp"));
let pairs: Vec<(String, Vec<u32>)> = try!(json::from_reader(f));
Ok(pairs)
}
pub fn language(lang: Language) -> Result<Corpus, Error> {
let (l, r) = mins(lang);
let pat_pairs = try!(patterns(lang));
let ex_pairs = try!(exceptions(lang));
let mut ps = Patterns::empty();
for p in pat_pairs {
ps.insert(p);
}
let mut exs = Exceptions::empty();
for ex in ex_pairs {
exs.insert(ex);
}
Ok(Corpus {
language: lang,
patterns: ps,
exceptions: if !exs.0.is_empty() { Some(exs) } else { None },
left_min: l,
right_min: r
})
}
#[derive(Debug)]
pub enum Error {
IO(io::Error),
Deserialization(json::Error)
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::IO(ref e) => e.fmt(f),
Error::Deserialization(ref e) => e.fmt(f)
}
}
}
impl error::Error for Error {
fn description(&self) -> &str {
match *self {
Error::IO(ref e) => e.description(),
Error::Deserialization(ref e) => e.description(),
}
}
}
impl From<io::Error> for Error {
fn from(err: io::Error) -> Error {
Error::IO(err)
}
}
impl From<json::Error> for Error {
fn from(err: json::Error) -> Error {
Error::Deserialization(err)
}
}