1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
use std::cmp::Ordering;

use opencorpora::OpencorporaTagReg;
use prelude::*;

#[derive(Debug, Clone, Copy)]
pub struct SingleTagProbabilityEstimator {}

impl SingleTagProbabilityEstimator {
    pub fn prob(self, morph: &MorphAnalyzer, word_lower: &str, tag: &OpencorporaTagReg) -> f64 {
        let dawg_key = format!("{}:{}", word_lower, tag.string);
        f64::from(morph.dict.p_t_given_w.find(&dawg_key).unwrap_or(0)) / 1_000_000.0
    }

    pub fn apply_to_parses(
        self,
        morph: &MorphAnalyzer,
        _word: &str,
        word_lower: &str,
        parses: &mut Vec<Parsed>,
    ) {
        if parses.is_empty() {
            return;
        }

        let probs: Vec<f64> = parses
            .iter()
            .map(|p: &Parsed| self.prob(morph, word_lower, p.lex.get_tag(morph)))
            .collect();

        if probs.iter().cloned().sum::<f64>() == 0.0 {
            // no P(t|w) information is available; return normalized estimate
            let k = 1.0 / parses.iter().map(|p: &Parsed| p.score.value()).sum::<f64>();
            for p in parses {
                p.score = p.score * k;
            }
        } else {
            for (ref mut p, prob) in parses.iter_mut().zip(probs.into_iter()) {
                p.score = Score::Real(prob);
            }
            parses.sort_by(|p1: &Parsed, p2: &Parsed| {
                p2.score
                    .value()
                    .partial_cmp(&p1.score.value())
                    .unwrap_or(Ordering::Equal)
            });
        }
    }

    pub fn apply_to_tags(
        self,
        morph: &MorphAnalyzer,
        _word: &str,
        word_lower: &str,
        tags: &mut Vec<OpencorporaTagReg>,
    ) {
        if tags.is_empty() {
            return;
        }

        tags.sort_by(|t1: &OpencorporaTagReg, t2: &OpencorporaTagReg| {
            self.prob(morph, word_lower, t2)
                .partial_cmp(&self.prob(morph, word_lower, t1))
                .unwrap_or(Ordering::Equal)
        });
    }
}