1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
//! Rust implementation of Weighted Finite States Transducers.
//!
//! Rustfst is a library for constructing, combining, optimizing, and searching weighted
//! finite-state transducers (FSTs). Weighted finite-state transducers are automata where
//! each transition has an input label, an output label, and a weight.
//! The more familiar finite-state acceptor is represented as a transducer
//! with each transition's input and output label equal. Finite-state acceptors
//! are used to represent sets of strings (specifically, regular or rational sets);
//! finite-state transducers are used to represent binary relations between pairs of
//! strings (specifically, rational transductions). The weights can be used to represent
//! the cost of taking a particular transition.
//!
//! FSTs have key applications in speech recognition and synthesis, machine translation,
//! optical character recognition, pattern matching, string processing, machine learning,
//! information extraction and retrieval among others. Often a weighted transducer is used to
//! represent a probabilistic model (e.g., an n-gram model, pronunciation model). FSTs can be
//! optimized by determinization and minimization, models can be applied to hypothesis sets
//! (also represented as automata) or cascaded by finite-state composition, and the best
//! results can be selected by shortest-path algorithms.
//!
//! ![fst](https://raw.githubusercontent.com/Garvys/rustfst-images-doc/master/images/project_in.svg?sanitize=true)
//!
//! ## References
//!
//! Implementation heavily inspired from Mehryar Mohri's, Cyril Allauzen's and Michael Riley's work :
//! - [Weighted automata algorithms](https://cs.nyu.edu/~mohri/pub/hwa.pdf)
//! - [The design principles of a weighted finite-state transducer library](https://core.ac.uk/download/pdf/82101846.pdf)
//! - [OpenFst: A general and efficient weighted finite-state transducer library](https://link.springer.com/chapter/10.1007%2F978-3-540-76336-9_3)
//! - [Weighted finite-state transducers in speech recognition](https://repository.upenn.edu/cgi/viewcontent.cgi?article=1010&context=cis_papers)
//!
//! ## Example
//!
//! ```rust
//! use anyhow::Result;
//! use rustfst::prelude::*;
//! use rustfst::algorithms::determinize::{DeterminizeType, determinize};
//! use rustfst::algorithms::rm_epsilon::rm_epsilon;
//! use std::sync::Arc;
//!
//! fn main() -> Result<()> {
//!     // Creates a empty wFST
//!     let mut fst = VectorFst::<TropicalWeight>::new();
//!
//!     // Add some states
//!     let s0 = fst.add_state();
//!     let s1 = fst.add_state();
//!     let s2 = fst.add_state();
//!
//!     // Set s0 as the start state
//!     fst.set_start(s0)?;
//!
//!     // Add a transition from s0 to s1
//!     fst.add_tr(s0, Tr::new(3, 5, 10.0, s1))?;
//!
//!     // Add a transition from s0 to s2
//!     fst.add_tr(s0, Tr::new(5, 7, 18.0, s2))?;
//!
//!     // Set s1 and s2 as final states
//!     fst.set_final(s1, 31.0)?;
//!     fst.set_final(s2, 45.0)?;
//!
//!     // Iter over all the paths in the wFST
//!     for p in fst.paths_iter() {
//!          println!("{:?}", p);
//!     }
//!
//!     // A lot of operations are available to modify/optimize the FST.
//!     // Here are a few examples :
//!
//!     // - Remove useless states.
//!     connect(&mut fst)?;
//!
//!     // - Optimize the FST by merging states with the same behaviour.
//!     minimize(&mut fst, true)?;
//!
//!     // - Copy all the input labels in the output.
//!     project(&mut fst, ProjectType::ProjectInput);
//!
//!     // - Remove epsilon transitions.
//!     rm_epsilon(&mut fst)?;
//!
//!     // - Compute an equivalent FST but deterministic.
//!     fst = determinize(Arc::new(fst), DeterminizeType::DeterminizeFunctional)?;
//!
//!     Ok(())
//! }
//! ```
//!
//! ## Status
//!
//! A big number of algorithms are already implemented. The main one missing is the Composition.

#[warn(missing_docs)]
#[cfg(test)]
extern crate counter;
#[macro_use]
extern crate anyhow;
#[cfg(test)]
extern crate rand;
#[cfg(test)]
extern crate serde;
#[cfg(test)]
extern crate serde_json;

pub use crate::drawing_config::DrawingConfig;
pub use crate::fst_path::{check_path_in_fst, FstPath};
pub use crate::symbol_table::SymbolTable;

pub use self::tr::Tr;
pub use self::trs::{Trs, TrsConst, TrsVec};

pub use crate::semirings::Semiring;
#[cfg(test)]
use doc_comment::doc_comment;

// When running `cargo test`, rustdoc will check this file as well.
#[cfg(test)]
doc_comment!(include_str!("../../README.md"));

#[cfg(test)]
mod tests_openfst;

mod symbol_table;

/// Type used for the input label and output label of a transition in a wFST -> usize
pub type Label = usize;
/// Symbol to map in the Symbol Table -> String
pub type Symbol = String;

/// Type used to identify a state in a wFST -> usize
pub type StateId = usize;

/// Epsilon label representing the epsilon transition (empty transition) = `0`.
pub const EPS_LABEL: Label = 0;
/// Epsilon symbol representing the epsilon transition (empty transition) = `<eps>`.
pub const EPS_SYMBOL: &str = "<eps>";

/// A few utilities to manipulate wFSTs.
pub mod utils;

/// Provides algorithms that are generic to all wFST.
pub mod algorithms;

/// Provides the `FstProperties` struct and some utils functions around it.
/// Useful to assert some properties on a Fst.
pub mod fst_properties;
/// Implementation of the transitions inside a wFST.
mod tr;

#[macro_use]
/// Provides traits that must be implemented to be able to use generic algorithms.
pub mod fst_traits;
/// Implementation of the wFST traits with different data structures.
pub mod fst_impls;
/// Provides a trait that shall be implemented for all weights stored inside a wFST.
pub mod semirings;

mod drawing_config;
/// Implementation of a successful path inside a wFST.
mod fst_path;
mod parsers;

/// A representable float near .001. (Used in Quantize)
pub const KDELTA: f32 = 1.0f32 / 1024.0f32;

/// Module re-exporting most of the objects from this crate.
pub mod prelude {
    pub use crate::algorithms::tr_compares::*;
    pub use crate::algorithms::*;
    pub use crate::fst_impls::*;
    pub use crate::fst_traits::*;
    pub use crate::semirings::*;
    pub use crate::tr::Tr;
}

mod proptest_fst;

pub(crate) static NO_LABEL: Label = std::usize::MAX;
pub(crate) static NO_STATE_ID: StateId = std::usize::MAX;
pub(crate) static UNASSIGNED: usize = std::usize::MAX;

pub mod trs;
pub mod trs_iter_mut;