lling-llang 0.1.0

WFST framework for text normalization and grammar correction
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
//! Epsilon filter for WFST composition.
//!
//! During composition, epsilon transitions must be handled carefully to avoid
//! incorrect or duplicate path enumeration. This module implements the epsilon
//! filter from Mohri (2009).
//!
//! # Filter States
//!
//! The filter maintains a state tracking which FST is currently processing
//! an epsilon transition:
//!
//! - `None`: No epsilon in progress, both FSTs can advance
//! - `Eps1`: FST1 output epsilon in progress, only FST2 can advance
//! - `Eps2`: FST2 input epsilon in progress, only FST1 can advance
//!
//! # Example
//!
//! ```rust
//! use lling_llang::composition::{EpsilonFilter, EpsilonFilterType, FilterState};
//!
//! let filter = EpsilonFilter::new(EpsilonFilterType::Sequencing);
//! let state = FilterState::None;
//!
//! // Check what transitions are allowed
//! let (can_eps1, can_eps2, can_match) = filter.allowed_moves(state);
//! ```

/// Epsilon filter type (from Mohri 2009).
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
pub enum EpsilonFilterType {
    /// No filter - for epsilon-free FSTs only.
    None,
    /// Sequencing filter - default for general FSTs.
    /// Ensures epsilons are processed in a specific order.
    #[default]
    Sequencing,
    /// Matching filter - for specific applications where
    /// epsilons must match between FSTs.
    Matching,
}

/// Filter state during composition.
///
/// Tracks which FST is currently in the middle of an epsilon transition.
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)]
pub enum FilterState {
    /// No epsilon in progress - both FSTs can advance.
    #[default]
    None,
    /// FST1 output epsilon in progress.
    /// Only FST2 can advance or FST1 can output more epsilons.
    Eps1,
    /// FST2 input epsilon in progress.
    /// Only FST1 can advance or FST2 can consume more epsilons.
    Eps2,
}

/// Epsilon filter for WFST composition.
///
/// Manages epsilon transitions during composition to ensure correct
/// and non-redundant path enumeration.
#[derive(Clone, Debug)]
pub struct EpsilonFilter {
    filter_type: EpsilonFilterType,
}

impl Default for EpsilonFilter {
    fn default() -> Self {
        Self {
            filter_type: EpsilonFilterType::Sequencing,
        }
    }
}

impl EpsilonFilter {
    /// Create a new epsilon filter with the given type.
    pub fn new(filter_type: EpsilonFilterType) -> Self {
        Self { filter_type }
    }

    /// Get the filter type.
    pub fn filter_type(&self) -> EpsilonFilterType {
        self.filter_type
    }

    /// Determine allowed moves from the current filter state.
    ///
    /// Returns `(can_eps1_output, can_eps2_input, can_match)`:
    /// - `can_eps1_output`: FST1 can output epsilon (advance FST1 only)
    /// - `can_eps2_input`: FST2 can consume epsilon (advance FST2 only)
    /// - `can_match`: Both FSTs can advance on matching label
    pub fn allowed_moves(&self, state: FilterState) -> (bool, bool, bool) {
        match self.filter_type {
            EpsilonFilterType::None => {
                // No filter - everything allowed
                (true, true, true)
            }
            EpsilonFilterType::Sequencing => {
                match state {
                    FilterState::None => (true, true, true),
                    FilterState::Eps1 => (true, false, true), // FST1 eps or match
                    FilterState::Eps2 => (false, true, true), // FST2 eps or match
                }
            }
            EpsilonFilterType::Matching => {
                match state {
                    FilterState::None => (true, true, true),
                    FilterState::Eps1 => (true, true, false), // Epsilons only
                    FilterState::Eps2 => (true, true, false), // Epsilons only
                }
            }
        }
    }

    /// Compute the next filter state after a transition.
    ///
    /// # Arguments
    ///
    /// * `_current` - Current filter state (unused but needed for interface consistency)
    /// * `eps1_output` - FST1 produced output epsilon
    /// * `eps2_input` - FST2 consumed input epsilon
    pub fn next_state(
        &self,
        _current: FilterState,
        eps1_output: bool,
        eps2_input: bool,
    ) -> FilterState {
        match self.filter_type {
            EpsilonFilterType::None => FilterState::None,
            EpsilonFilterType::Sequencing => {
                if eps1_output && !eps2_input {
                    FilterState::Eps1
                } else if eps2_input && !eps1_output {
                    FilterState::Eps2
                } else {
                    // Both epsilon (eps-eps) or both non-epsilon (match)
                    FilterState::None
                }
            }
            EpsilonFilterType::Matching => {
                // Matching filter returns to None only on eps-eps or match
                if eps1_output == eps2_input {
                    FilterState::None
                } else if eps1_output {
                    FilterState::Eps1
                } else {
                    FilterState::Eps2
                }
            }
        }
    }

    /// Check if a transition is allowed given the filter state.
    ///
    /// # Arguments
    ///
    /// * `state` - Current filter state
    /// * `eps1_output` - FST1 would produce output epsilon
    /// * `eps2_input` - FST2 would consume input epsilon
    /// * `is_match` - Labels match (non-epsilon transition)
    pub fn is_transition_allowed(
        &self,
        state: FilterState,
        eps1_output: bool,
        eps2_input: bool,
        is_match: bool,
    ) -> bool {
        let (can_eps1, can_eps2, can_match) = self.allowed_moves(state);

        if is_match {
            can_match
        } else if eps1_output && !eps2_input {
            can_eps1
        } else if eps2_input && !eps1_output {
            can_eps2
        } else if eps1_output && eps2_input {
            // Both epsilon - allowed if either is allowed
            can_eps1 || can_eps2
        } else {
            false
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_filter_type_default() {
        let filter = EpsilonFilter::default();
        assert_eq!(filter.filter_type(), EpsilonFilterType::Sequencing);
    }

    #[test]
    fn test_filter_state_default() {
        let state = FilterState::default();
        assert_eq!(state, FilterState::None);
    }

    #[test]
    fn test_no_filter_allows_all() {
        let filter = EpsilonFilter::new(EpsilonFilterType::None);

        for state in [FilterState::None, FilterState::Eps1, FilterState::Eps2] {
            let (eps1, eps2, match_) = filter.allowed_moves(state);
            assert!(eps1);
            assert!(eps2);
            assert!(match_);
        }
    }

    #[test]
    fn test_sequencing_filter_none_state() {
        let filter = EpsilonFilter::new(EpsilonFilterType::Sequencing);

        let (eps1, eps2, match_) = filter.allowed_moves(FilterState::None);
        assert!(eps1);
        assert!(eps2);
        assert!(match_);
    }

    #[test]
    fn test_sequencing_filter_eps1_state() {
        let filter = EpsilonFilter::new(EpsilonFilterType::Sequencing);

        let (eps1, eps2, match_) = filter.allowed_moves(FilterState::Eps1);
        assert!(eps1); // FST1 can continue with epsilons
        assert!(!eps2); // FST2 cannot start epsilon sequence
        assert!(match_); // Matching still allowed
    }

    #[test]
    fn test_sequencing_filter_eps2_state() {
        let filter = EpsilonFilter::new(EpsilonFilterType::Sequencing);

        let (eps1, eps2, match_) = filter.allowed_moves(FilterState::Eps2);
        assert!(!eps1); // FST1 cannot start epsilon sequence
        assert!(eps2); // FST2 can continue with epsilons
        assert!(match_); // Matching still allowed
    }

    #[test]
    fn test_next_state_sequencing() {
        let filter = EpsilonFilter::new(EpsilonFilterType::Sequencing);

        // eps1 output -> Eps1 state
        assert_eq!(
            filter.next_state(FilterState::None, true, false),
            FilterState::Eps1
        );

        // eps2 input -> Eps2 state
        assert_eq!(
            filter.next_state(FilterState::None, false, true),
            FilterState::Eps2
        );

        // Match (both non-eps) -> None
        assert_eq!(
            filter.next_state(FilterState::Eps1, false, false),
            FilterState::None
        );

        // Both epsilon -> None
        assert_eq!(
            filter.next_state(FilterState::None, true, true),
            FilterState::None
        );
    }

    #[test]
    fn test_is_transition_allowed_match() {
        let filter = EpsilonFilter::new(EpsilonFilterType::Sequencing);

        // Match allowed in all states
        assert!(filter.is_transition_allowed(FilterState::None, false, false, true));
        assert!(filter.is_transition_allowed(FilterState::Eps1, false, false, true));
        assert!(filter.is_transition_allowed(FilterState::Eps2, false, false, true));
    }

    #[test]
    fn test_is_transition_allowed_eps1() {
        let filter = EpsilonFilter::new(EpsilonFilterType::Sequencing);

        // eps1 output
        assert!(filter.is_transition_allowed(FilterState::None, true, false, false));
        assert!(filter.is_transition_allowed(FilterState::Eps1, true, false, false));
        assert!(!filter.is_transition_allowed(FilterState::Eps2, true, false, false));
    }

    #[test]
    fn test_is_transition_allowed_eps2() {
        let filter = EpsilonFilter::new(EpsilonFilterType::Sequencing);

        // eps2 input
        assert!(filter.is_transition_allowed(FilterState::None, false, true, false));
        assert!(!filter.is_transition_allowed(FilterState::Eps1, false, true, false));
        assert!(filter.is_transition_allowed(FilterState::Eps2, false, true, false));
    }

    #[test]
    fn test_matching_filter() {
        let filter = EpsilonFilter::new(EpsilonFilterType::Matching);

        // In Eps1 or Eps2, matching is not allowed (only epsilons)
        let (_, _, match_) = filter.allowed_moves(FilterState::Eps1);
        assert!(!match_);

        let (_, _, match_) = filter.allowed_moves(FilterState::Eps2);
        assert!(!match_);
    }
}

// =============================================================================
// Property-Based Tests
// =============================================================================

#[cfg(test)]
mod property_tests {
    use super::*;
    use proptest::prelude::*;

    /// Strategy for generating arbitrary filter types.
    fn arb_filter_type() -> impl Strategy<Value = EpsilonFilterType> {
        prop_oneof![
            Just(EpsilonFilterType::None),
            Just(EpsilonFilterType::Sequencing),
            Just(EpsilonFilterType::Matching),
        ]
    }

    /// Strategy for generating arbitrary filter states.
    fn arb_filter_state() -> impl Strategy<Value = FilterState> {
        prop_oneof![
            Just(FilterState::None),
            Just(FilterState::Eps1),
            Just(FilterState::Eps2),
        ]
    }

    proptest! {
        #![proptest_config(ProptestConfig::with_cases(100))]

        /// next_state is deterministic - same inputs always produce same output.
        #[test]
        fn next_state_deterministic(
            filter_type in arb_filter_type(),
            state in arb_filter_state(),
            eps1 in any::<bool>(),
            eps2 in any::<bool>()
        ) {
            let filter = EpsilonFilter::new(filter_type);
            let result1 = filter.next_state(state, eps1, eps2);
            let result2 = filter.next_state(state, eps1, eps2);
            prop_assert_eq!(result1, result2);
        }

        /// No filter type always returns FilterState::None.
        #[test]
        fn no_filter_always_none(
            state in arb_filter_state(),
            eps1 in any::<bool>(),
            eps2 in any::<bool>()
        ) {
            let filter = EpsilonFilter::new(EpsilonFilterType::None);
            let result = filter.next_state(state, eps1, eps2);
            prop_assert_eq!(result, FilterState::None);
        }

        /// No filter allows all moves in all states.
        #[test]
        fn no_filter_allows_all(state in arb_filter_state()) {
            let filter = EpsilonFilter::new(EpsilonFilterType::None);
            let (eps1, eps2, match_) = filter.allowed_moves(state);
            prop_assert!(eps1);
            prop_assert!(eps2);
            prop_assert!(match_);
        }

        /// Sequencing filter allows matching in all states.
        #[test]
        fn sequencing_allows_match(state in arb_filter_state()) {
            let filter = EpsilonFilter::new(EpsilonFilterType::Sequencing);
            let (_, _, match_) = filter.allowed_moves(state);
            prop_assert!(match_);
        }

        /// Matching filter disallows matching in Eps1 and Eps2.
        #[test]
        fn matching_disallows_match_in_eps_states(state in arb_filter_state()) {
            let filter = EpsilonFilter::new(EpsilonFilterType::Matching);
            let (_, _, match_) = filter.allowed_moves(state);
            match state {
                FilterState::None => prop_assert!(match_),
                FilterState::Eps1 | FilterState::Eps2 => prop_assert!(!match_),
            }
        }

        /// From None state, both filters start fresh.
        #[test]
        fn none_state_symmetric(filter_type in arb_filter_type()) {
            let filter = EpsilonFilter::new(filter_type);
            let (eps1, eps2, match_) = filter.allowed_moves(FilterState::None);
            // From None, both eps1 and eps2 should be allowed
            prop_assert!(eps1);
            prop_assert!(eps2);
            prop_assert!(match_);
        }

        /// Sequencing filter state transitions are symmetric for eps1/eps2.
        #[test]
        fn sequencing_symmetric(eps1_only in any::<bool>()) {
            let filter = EpsilonFilter::new(EpsilonFilterType::Sequencing);

            // Test that eps1 alone leads to Eps1 state, eps2 alone to Eps2
            if eps1_only {
                let next = filter.next_state(FilterState::None, true, false);
                prop_assert_eq!(next, FilterState::Eps1);
            } else {
                let next = filter.next_state(FilterState::None, false, true);
                prop_assert_eq!(next, FilterState::Eps2);
            }
        }

        /// Match transition resets to None for sequencing filter.
        #[test]
        fn sequencing_match_resets(state in arb_filter_state()) {
            let filter = EpsilonFilter::new(EpsilonFilterType::Sequencing);
            // A match (both non-eps) should reset to None
            let next = filter.next_state(state, false, false);
            prop_assert_eq!(next, FilterState::None);
        }

        /// is_transition_allowed is consistent with allowed_moves.
        #[test]
        fn is_transition_allowed_consistent(
            filter_type in arb_filter_type(),
            state in arb_filter_state()
        ) {
            let filter = EpsilonFilter::new(filter_type);
            let (can_eps1, can_eps2, can_match) = filter.allowed_moves(state);

            // Check match case
            prop_assert_eq!(
                filter.is_transition_allowed(state, false, false, true),
                can_match
            );

            // Check eps1 only case
            prop_assert_eq!(
                filter.is_transition_allowed(state, true, false, false),
                can_eps1
            );

            // Check eps2 only case
            prop_assert_eq!(
                filter.is_transition_allowed(state, false, true, false),
                can_eps2
            );
        }

        /// Both eps transitions need at least one allowed.
        #[test]
        fn both_eps_needs_at_least_one(
            filter_type in arb_filter_type(),
            state in arb_filter_state()
        ) {
            let filter = EpsilonFilter::new(filter_type);
            let (can_eps1, can_eps2, _) = filter.allowed_moves(state);

            // Both eps is allowed if either is allowed
            let both_eps_allowed = filter.is_transition_allowed(state, true, true, false);
            prop_assert_eq!(both_eps_allowed, can_eps1 || can_eps2);
        }
    }
}