1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
use std::cmp::Ordering;
use opencorpora::OpencorporaTagReg;
use prelude::*;
#[derive(Debug, Clone, Copy)]
pub struct SingleTagProbabilityEstimator {}
impl SingleTagProbabilityEstimator {
pub fn prob(self, morph: &MorphAnalyzer, word_lower: &str, tag: &OpencorporaTagReg) -> f64 {
let dawg_key = format!("{}:{}", word_lower, tag.string);
f64::from(morph.dict.p_t_given_w.find(&dawg_key).unwrap_or(0)) / 1_000_000.0
}
pub fn apply_to_parses(
self,
morph: &MorphAnalyzer,
_word: &str,
word_lower: &str,
parses: &mut Vec<Parsed>,
) {
if parses.is_empty() {
return;
}
let probs: Vec<f64> = parses
.iter()
.map(|p: &Parsed| self.prob(morph, word_lower, p.lex.get_tag(morph)))
.collect();
if probs.iter().cloned().sum::<f64>() == 0.0 {
let k = 1.0 / parses.iter().map(|p: &Parsed| p.score.value()).sum::<f64>();
for p in parses {
p.score = p.score * k;
}
} else {
for (ref mut p, prob) in parses.iter_mut().zip(probs.into_iter()) {
p.score = Score::Real(prob);
}
parses.sort_by(|p1: &Parsed, p2: &Parsed| {
p2.score
.value()
.partial_cmp(&p1.score.value())
.unwrap_or(Ordering::Equal)
});
}
}
pub fn apply_to_tags(
self,
morph: &MorphAnalyzer,
_word: &str,
word_lower: &str,
tags: &mut Vec<OpencorporaTagReg>,
) {
if tags.is_empty() {
return;
}
tags.sort_by(|t1: &OpencorporaTagReg, t2: &OpencorporaTagReg| {
self.prob(morph, word_lower, t2)
.partial_cmp(&self.prob(morph, word_lower, t1))
.unwrap_or(Ordering::Equal)
});
}
}