rustfst/algorithms/determinize/
determinize_static.rs

1use std::borrow::Borrow;
2
3use anyhow::Result;
4
5use crate::algorithms::determinize::divisors::CommonDivisor;
6use crate::algorithms::determinize::DeterminizeFsa;
7use crate::algorithms::determinize::{DefaultCommonDivisor, DeterminizeType, GallicCommonDivisor};
8use crate::algorithms::factor_weight::factor_iterators::{
9    GallicFactor, GallicFactorMin, GallicFactorRestrict,
10};
11use crate::algorithms::factor_weight::{factor_weight, FactorWeightOptions, FactorWeightType};
12use crate::algorithms::weight_convert;
13use crate::algorithms::weight_converters::{FromGallicConverter, ToGallicConverter};
14use crate::fst_impls::VectorFst;
15use crate::fst_properties::mutable_properties::determinize_properties;
16use crate::fst_properties::FstProperties;
17use crate::fst_traits::{AllocableFst, ExpandedFst, Fst, MutableFst};
18use crate::semirings::SemiringProperties;
19use crate::semirings::{
20    GallicWeight, GallicWeightMin, GallicWeightRestrict, WeaklyDivisibleSemiring, WeightQuantize,
21};
22use crate::{EPS_LABEL, KDELTA};
23
24pub fn determinize_with_distance<W, F1, F2>(
25    ifst: &F1,
26    in_dist: &[W],
27    delta: f32,
28) -> Result<(F2, Vec<W>)>
29where
30    W: WeaklyDivisibleSemiring + WeightQuantize,
31    F1: ExpandedFst<W>,
32    F2: MutableFst<W> + AllocableFst<W>,
33{
34    if !W::properties().contains(SemiringProperties::LEFT_SEMIRING) {
35        bail!("determinize_fsa : weight must be left distributive")
36    }
37    let fst = DeterminizeFsa::<_, F1, DefaultCommonDivisor, _, _>::new(ifst, Some(in_dist), delta)?;
38    fst.compute_with_distance()
39}
40
41pub fn determinize_fsa<W, F1, F2, CD>(fst_in: &F1, delta: f32) -> Result<F2>
42where
43    W: WeaklyDivisibleSemiring + WeightQuantize,
44    F1: Fst<W>,
45    F2: MutableFst<W> + AllocableFst<W>,
46    CD: CommonDivisor<W>,
47{
48    if !W::properties().contains(SemiringProperties::LEFT_SEMIRING) {
49        bail!("determinize_fsa : weight must be left distributive")
50    }
51    let det_fsa: DeterminizeFsa<W, F1, CD, _, Vec<W>> = DeterminizeFsa::new(fst_in, None, delta)?;
52    det_fsa.compute()
53}
54
55pub fn determinize_fst<W, F1, F2>(fst_in: &F1, det_type: DeterminizeType, delta: f32) -> Result<F2>
56where
57    W: WeaklyDivisibleSemiring + WeightQuantize + 'static,
58    F1: ExpandedFst<W>,
59    F2: MutableFst<W> + AllocableFst<W>,
60{
61    let mut to_gallic = ToGallicConverter {};
62    let mut from_gallic = FromGallicConverter {
63        superfinal_label: EPS_LABEL,
64    };
65
66    let factor_opts = FactorWeightOptions {
67        delta: KDELTA,
68        mode: FactorWeightType::FACTOR_FINAL_WEIGHTS,
69        final_ilabel: EPS_LABEL,
70        final_olabel: EPS_LABEL,
71        increment_final_ilabel: false,
72        increment_final_olabel: false,
73    };
74
75    match det_type {
76        DeterminizeType::DeterminizeDisambiguate => {
77            if !W::properties().contains(SemiringProperties::PATH) {
78                bail!("determinize : weight needs to have the path property to disambiguate output")
79            }
80            let fsa: VectorFst<GallicWeightMin<W>> =
81                weight_convert(fst_in.borrow(), &mut to_gallic)?;
82            let determinized_fsa: VectorFst<GallicWeightMin<W>> =
83                determinize_fsa::<_, VectorFst<_>, _, GallicCommonDivisor>(&fsa, delta)?;
84            let factored_determinized_fsa: VectorFst<GallicWeightMin<W>> =
85                factor_weight::<_, VectorFst<GallicWeightMin<W>>, _, _, GallicFactorMin<W>>(
86                    &determinized_fsa,
87                    factor_opts,
88                )?;
89            weight_convert(&factored_determinized_fsa, &mut from_gallic)
90        }
91        DeterminizeType::DeterminizeFunctional => {
92            let fsa: VectorFst<GallicWeightRestrict<W>> =
93                weight_convert(fst_in.borrow(), &mut to_gallic)?;
94            let determinized_fsa: VectorFst<GallicWeightRestrict<W>> =
95                determinize_fsa::<_, VectorFst<_>, _, GallicCommonDivisor>(&fsa, delta)?;
96            let factored_determinized_fsa: VectorFst<GallicWeightRestrict<W>> =
97                factor_weight::<
98                    _,
99                    VectorFst<GallicWeightRestrict<W>>,
100                    _,
101                    _,
102                    GallicFactorRestrict<W>,
103                >(&determinized_fsa, factor_opts)?;
104            weight_convert(&factored_determinized_fsa, &mut from_gallic)
105        }
106        DeterminizeType::DeterminizeNonFunctional => {
107            let fsa: VectorFst<GallicWeight<W>> = weight_convert(fst_in.borrow(), &mut to_gallic)?;
108            let determinized_fsa: VectorFst<GallicWeight<W>> =
109                determinize_fsa::<_, VectorFst<_>, _, GallicCommonDivisor>(&fsa, delta)?;
110            let factored_determinized_fsa: VectorFst<GallicWeight<W>> =
111                factor_weight::<_, VectorFst<GallicWeight<W>>, _, _, GallicFactor<W>>(
112                    &determinized_fsa,
113                    factor_opts,
114                )?;
115            weight_convert(&factored_determinized_fsa, &mut from_gallic)
116        }
117    }
118}
119
120#[derive(Clone, Debug, Copy, PartialOrd, PartialEq)]
121pub struct DeterminizeConfig {
122    pub delta: f32,
123    pub det_type: DeterminizeType,
124}
125
126impl DeterminizeConfig {
127    pub fn new(delta: f32, det_type: DeterminizeType) -> Self {
128        Self { delta, det_type }
129    }
130
131    pub fn with_delta(self, delta: f32) -> Self {
132        Self { delta, ..self }
133    }
134
135    pub fn with_det_type(self, det_type: DeterminizeType) -> Self {
136        Self { det_type, ..self }
137    }
138}
139
140impl Default for DeterminizeConfig {
141    fn default() -> Self {
142        Self {
143            delta: KDELTA,
144            det_type: DeterminizeType::DeterminizeFunctional,
145        }
146    }
147}
148
149pub fn determinize<W, F1, F2>(fst_in: &F1) -> Result<F2>
150where
151    W: WeaklyDivisibleSemiring + WeightQuantize,
152    F1: ExpandedFst<W>,
153    F2: MutableFst<W> + AllocableFst<W>,
154{
155    determinize_with_config(fst_in, DeterminizeConfig::default())
156}
157
158/// This operations creates an equivalent FST that has the property that no
159/// state has two transitions with the same input label. For this algorithm,
160/// epsilon transitions are treated as regular symbols.
161///
162/// # Example
163///
164/// ## Input
165///
166/// ![determinize_in](https://raw.githubusercontent.com/Garvys/rustfst-images-doc/master/images/determinize_in.svg?sanitize=true)
167///
168/// ## Determinize
169///
170/// ![determinize_out](https://raw.githubusercontent.com/Garvys/rustfst-images-doc/master/images/determinize_out.svg?sanitize=true)
171///
172pub fn determinize_with_config<W, F1, F2>(fst_in: &F1, config: DeterminizeConfig) -> Result<F2>
173where
174    W: WeaklyDivisibleSemiring + WeightQuantize,
175    F1: ExpandedFst<W>,
176    F2: MutableFst<W> + AllocableFst<W>,
177{
178    let delta = config.delta;
179    let det_type = config.det_type;
180    let iprops = fst_in.borrow().properties();
181    let mut fst_res: F2 = if iprops.contains(FstProperties::ACCEPTOR) {
182        determinize_fsa::<_, F1, _, DefaultCommonDivisor>(fst_in, delta)?
183    } else {
184        determinize_fst(fst_in, det_type, delta)?
185    };
186
187    let distinct_psubsequential_labels = !(det_type == DeterminizeType::DeterminizeNonFunctional);
188    fst_res.set_properties(determinize_properties(
189        iprops,
190        false,
191        distinct_psubsequential_labels,
192    ));
193    fst_res.set_symts_from_fst(fst_in.borrow());
194    Ok(fst_res)
195}
196
197#[cfg(test)]
198mod tests {
199    use crate::fst_impls::VectorFst;
200    use crate::semirings::TropicalWeight;
201    use crate::tr::Tr;
202    use crate::Semiring;
203    use crate::SymbolTable;
204    use proptest::prelude::any;
205    use proptest::proptest;
206    use std::sync::Arc;
207
208    use super::*;
209
210    #[test]
211    fn test_determinize() -> Result<()> {
212        let mut input_fst = VectorFst::<TropicalWeight>::new();
213        let s0 = input_fst.add_state();
214        let s1 = input_fst.add_state();
215
216        input_fst.set_start(s0)?;
217        input_fst.set_final(s1, TropicalWeight::one())?;
218
219        input_fst.add_tr(s0, Tr::new(1, 1, 2.0, s1))?;
220        input_fst.add_tr(s0, Tr::new(1, 1, 2.0, s1))?;
221        input_fst.add_tr(s0, Tr::new(1, 1, 2.0, s1))?;
222
223        let mut ref_fst = VectorFst::new();
224        let s0 = ref_fst.add_state();
225        let s1 = ref_fst.add_state();
226
227        ref_fst.set_start(s0)?;
228        ref_fst.set_final(s1, TropicalWeight::one())?;
229
230        ref_fst.add_tr(s0, Tr::new(1, 1, TropicalWeight::new(2.0), s1))?;
231
232        let determinized_fst: VectorFst<TropicalWeight> = determinize(&input_fst)?;
233
234        assert_eq!(determinized_fst, ref_fst);
235        Ok(())
236    }
237
238    #[test]
239    fn test_determinize_2() -> Result<()> {
240        let mut input_fst = VectorFst::<TropicalWeight>::new();
241        let s0 = input_fst.add_state();
242        let s1 = input_fst.add_state();
243        let s2 = input_fst.add_state();
244        let s3 = input_fst.add_state();
245
246        input_fst.set_start(s0)?;
247        input_fst.set_final(s3, TropicalWeight::one())?;
248
249        input_fst.add_tr(s0, Tr::new(1, 1, 2.0, s1))?;
250        input_fst.add_tr(s0, Tr::new(1, 1, 3.0, s2))?;
251
252        input_fst.add_tr(s1, Tr::new(2, 2, 4.0, s3))?;
253        input_fst.add_tr(s2, Tr::new(2, 2, 3.0, s3))?;
254
255        let mut ref_fst = VectorFst::new();
256        let s0 = ref_fst.add_state();
257        let s1 = ref_fst.add_state();
258        let s2 = ref_fst.add_state();
259
260        ref_fst.set_start(s0)?;
261        ref_fst.set_final(s2, TropicalWeight::one())?;
262
263        ref_fst.add_tr(s0, Tr::new(1, 1, TropicalWeight::new(2.0), s1))?;
264        ref_fst.add_tr(s1, Tr::new(2, 2, TropicalWeight::new(4.0), s2))?;
265
266        let determinized_fst: VectorFst<TropicalWeight> = determinize(&input_fst)?;
267
268        assert_eq!(determinized_fst, ref_fst);
269        Ok(())
270    }
271
272    proptest! {
273        #[test]
274        fn test_proptest_determinize_keeps_symts(mut fst in any::<VectorFst::<TropicalWeight>>()) {
275            let symt = Arc::new(SymbolTable::new());
276            fst.set_input_symbols(Arc::clone(&symt));
277            fst.set_output_symbols(Arc::clone(&symt));
278
279            let fst : VectorFst<_> = determinize_with_config(&fst, DeterminizeConfig::default().with_det_type(DeterminizeType::DeterminizeNonFunctional)).unwrap();
280
281            assert!(fst.input_symbols().is_some());
282            assert!(fst.output_symbols().is_some());
283        }
284    }
285}