Struct CompactFst

Source

pub struct CompactFst<W: Semiring, C: Compactor<W>> { /* private fields */ }

Expand description

Memory-optimized FST implementation with pluggable compression strategies

CompactFst is a specialized FST implementation designed for scenarios where memory efficiency is the primary concern, even at the cost of some computational overhead. It uses customizable compression strategies to reduce the memory footprint of large FSTs, making it suitable for deployment on resource-constrained devices or when working with exceptionally large automata.

§Design Characteristics

Compression-First: Prioritizes minimal memory usage over access speed
Pluggable Compaction: Customizable compression strategies via the Compactor trait
Trade-off Oriented: Exchanges computational overhead for reduced memory footprint
Specialization-Ready: Supports domain-specific optimizations through custom compactors
Immutable Structure: Read-only access pattern for predictable memory usage

§Performance Profile

Operation	Time Complexity	Memory Overhead	Notes
Arc Access	O(1) + decompression	Minimal	Requires decompression per access
State Access	O(1)	Fixed per state	Direct indexing into state array
Memory Usage	~40-70% of VectorFst	Depends on compactor	Significant savings
Construction	O(V + E)	Temporary spike	One-time compression cost
Cache Performance	Variable	Excellent	Compressed data fits in cache

§Memory Layout and Compression

CompactFst Memory Structure:
┌─────────────────────────────┐
│ States Array                │ ← Vec<CompactState>: metadata per state
│ [State 0: arcs_start, ...]  │   - final_weightᵢdx: Option<u32>
│ [State 1: arcs_start, ...]  │   - arcs_start: u32 (data array offset)
│ [State N: arcs_start, ...]  │   - num_arcs: u32 (arc count)
└─────────────────────────────┘
┌─────────────────────────────┐
│ Compressed Data Array       │ ← Vec<C::Element>: compressed arcs & weights
│ [Compressed Arc 0]          │   Compactor-specific format
│ [Compressed Arc 1]          │   May pack multiple fields together
│ [Compressed Weight 0]       │   Custom compression schemes
│ [...]                       │
└─────────────────────────────┘

§Compression Strategies

§Default Compression

The DefaultCompactor provides a baseline compression approach:

Stores arcs and weights in enumerated format
Maintains full precision of original data
Suitable for general-purpose usage

§Custom Compression Examples

use arcweight::prelude::*;
use arcweight::fst::{CompactFst, Compactor};

// Example: Custom compactor for small alphabets
#[derive(Debug)]
struct SmallAlphabetCompactor;

impl Compactor<TropicalWeight> for SmallAlphabetCompactor {
    type Element = u64; // Pack arc data into single u64
     
    fn compact(&self, arc: &Arc<TropicalWeight>) -> u64 {
        // Pack: 16 bits ilabel + 16 bits olabel + 16 bits nextstate + 16 bits weight
        let weight_bits = *arc.weight.value() as u64; // Simplified
        (arc.ilabel as u64) << 48 |
        (arc.olabel as u64) << 32 |
        (arc.nextstate as u64) << 16 |
        weight_bits
    }
     
    fn expand(&self, element: &u64) -> Arc<TropicalWeight> {
        let ilabel = (element >> 48) as u32;
        let olabel = ((element >> 32) & 0xFFFF) as u32;
        let nextstate = ((element >> 16) & 0xFFFF) as u32;
        let weight_val = (element & 0xFFFF) as f32;
        Arc::new(ilabel, olabel, TropicalWeight::new(weight_val), nextstate)
    }
     
    fn compact_weight(&self, weight: &TropicalWeight) -> u64 {
        *weight.value() as u64
    }
     
    fn expand_weight(&self, element: &u64) -> TropicalWeight {
        TropicalWeight::new(*element as f32)
    }
}

§Use Cases

§Mobile/Embedded Deployment

use arcweight::prelude::*;
use arcweight::fst::{CompactFst, DefaultCompactor};

// Deploy large language model on mobile device
fn create_mobile_language_model() -> CompactFst<TropicalWeight, DefaultCompactor<TropicalWeight>> {
    let base_fst = CompactFst::<TropicalWeight, DefaultCompactor<TropicalWeight>>::new();
     
    // Compressed representation reduces memory requirements
    // Suitable for devices with limited RAM
    base_fst
}

// Memory-conscious processing
fn process_on_mobile_device(
    fst: &CompactFst<TropicalWeight, DefaultCompactor<TropicalWeight>>,
    input: &[u32]
) {
    if let Some(start) = fst.start() {
        let mut current = start;
        for &label in input {
            // Each arc access involves decompression
            // But overall memory usage is minimal
            for arc in fst.arcs(current) {
                if arc.ilabel == label {
                    current = arc.nextstate;
                    break;
                }
            }
        }
    }
}

§Large-Scale Dictionary Compression

use arcweight::prelude::*;
use arcweight::fst::{CompactFst, DefaultCompactor};

// Compress massive pronunciation dictionary
fn compress_pronunciation_dict(
    // Input would be a large VectorFst with millions of entries
) -> CompactFst<LogWeight, DefaultCompactor<LogWeight>> {
    // The compaction process would convert from VectorFst
    // Achieving 40-60% memory reduction for large dictionaries
    let compact_dict = CompactFst::new();
     
    // Compressed dict can fit in memory where uncompressed cannot
    compact_dict
}

// Lookup in compressed dictionary
fn lookup_pronunciation(
    dict: &CompactFst<LogWeight, DefaultCompactor<LogWeight>>,
    word: &str
) -> Vec<String> {
    let mut pronunciations = Vec::new();
     
    if let Some(start) = dict.start() {
        // Traverse compressed FST
        // Decompression happens transparently during access
        let mut current = start;
        for ch in word.chars() {
            for arc in dict.arcs(current) {
                if arc.ilabel == ch as u32 {
                    current = arc.nextstate;
                    break;
                }
            }
        }
         
        // Extract pronunciations from final states
        // (Implementation details omitted for brevity)
    }
     
    pronunciations
}

§Cloud Storage Optimization

use arcweight::prelude::*;
use arcweight::fst::{CompactFst, DefaultCompactor};

// Optimize FSTs for cloud storage and transmission
fn optimize_for_cloud_storage() -> CompactFst<ProbabilityWeight, DefaultCompactor<ProbabilityWeight>> {
    let compact_fst = CompactFst::new();
     
    // Benefits:
    // - Reduced storage costs (smaller files)
    // - Faster network transmission
    // - Lower bandwidth usage
    // - Reduced I/O operations
     
    compact_fst
}

// Efficient batch processing of compressed FSTs
fn batch_process_compressed_fsts(
    fsts: &[CompactFst<ProbabilityWeight, DefaultCompactor<ProbabilityWeight>>]
) {
    for fst in fsts {
        // Process multiple compressed FSTs in memory simultaneously
        // Memory efficiency allows larger batch sizes
        process_single_fst(fst);
    }
}

fn process_single_fst(
    fst: &CompactFst<ProbabilityWeight, DefaultCompactor<ProbabilityWeight>>
) {
    // FST processing logic
    // Compression overhead amortized across batch processing
}

§Memory-Constrained Analysis

use arcweight::prelude::*;
use arcweight::fst::{CompactFst, DefaultCompactor};

// Analyze very large FSTs within memory constraints
fn analyze_large_fst_efficiently(
    fst: &CompactFst<BooleanWeight, DefaultCompactor<BooleanWeight>>
) -> AnalysisResult {
    let mut result = AnalysisResult::new();
     
    // Memory-efficient traversal
    for state in fst.states() {
        // Analyze state properties
        result.state_count += 1;
         
        // Count arcs with minimal memory overhead
        for arc in fst.arcs(state) {
            result.arc_count += 1;
             
            // Decompression cost amortized over analysis
            if arc.ilabel == 0 {
                result.epsilon_count += 1;
            }
        }
    }
     
    result
}

#[derive(Default)]
struct AnalysisResult {
    state_count: usize,
    arc_count: usize,
    epsilon_count: usize,
}

impl AnalysisResult {
    fn new() -> Self { Self::default() }
}

§Compactor Implementation Patterns

§Domain-Specific Compression

use arcweight::prelude::*;
use arcweight::fst::{Compactor, CompactFst};

// Example: Pronunciation-specific compactor
#[derive(Debug)]
struct PhonemeCompactor;

impl Compactor<TropicalWeight> for PhonemeCompactor {
    type Element = CompactPhoneme;
     
    fn compact(&self, arc: &Arc<TropicalWeight>) -> CompactPhoneme {
        // Custom compression for phoneme data
        // Could map common phoneme combinations to single values
        CompactPhoneme {
            phoneme_code: map_to_phoneme_code(arc.ilabel, arc.olabel),
            weight_class: quantize_weight(&arc.weight),
            next_state: arc.nextstate,
        }
    }
     
    fn expand(&self, element: &CompactPhoneme) -> Arc<TropicalWeight> {
        let (ilabel, olabel) = expand_phoneme_code(element.phoneme_code);
        let weight = dequantize_weight(element.weight_class);
        Arc::new(ilabel, olabel, weight, element.next_state)
    }
     
    fn compact_weight(&self, weight: &TropicalWeight) -> CompactPhoneme {
        // Weight-only compression
        CompactPhoneme {
            phoneme_code: 0,
            weight_class: quantize_weight(weight),
            next_state: 0,
        }
    }
     
    fn expand_weight(&self, element: &CompactPhoneme) -> TropicalWeight {
        dequantize_weight(element.weight_class)
    }
}

#[derive(Clone, Debug)]
struct CompactPhoneme {
    phoneme_code: u16,  // Compressed phoneme pair
    weight_class: u8,   // Quantized weight
    next_state: u32,
}

fn map_to_phoneme_code(ilabel: u32, olabel: u32) -> u16 {
    // Domain-specific compression logic
    ((ilabel & 0xFF) << 8 | (olabel & 0xFF)) as u16
}

fn expand_phoneme_code(code: u16) -> (u32, u32) {
    ((code >> 8) as u32, (code & 0xFF) as u32)
}

fn quantize_weight(weight: &TropicalWeight) -> u8 {
    // Quantize weight to 256 levels
    (weight.value().clamp(0.0, 25.5) * 10.0) as u8
}

fn dequantize_weight(quantized: u8) -> TropicalWeight {
    TropicalWeight::new(quantized as f32 / 10.0)
}

§Performance Optimization Guidelines

§When to Use CompactFst

✅ Memory is severely constrained (embedded systems, mobile devices)
✅ Very large FSTs that don’t fit in memory uncompressed
✅ Network transmission or storage optimization is critical
✅ Batch processing where memory efficiency enables larger batches
✅ Long-running applications where compression amortizes over time

§When NOT to Use CompactFst

❌ Real-time applications requiring minimal latency
❌ Frequent random access patterns
❌ Small FSTs where compression overhead exceeds benefits
❌ Applications that modify FSTs frequently
❌ CPU-constrained environments where decompression is expensive

§Memory vs. Performance Trade-offs

Compression Ratio: Higher compression = more CPU overhead
Access Patterns: Sequential access amortizes decompression cost
Cache Behavior: Compressed data may improve cache hit rates
Batch Processing: Compression overhead amortized across operations

§Limitations and Considerations

§Current Implementation Limitations

final_weight() method requires redesign to avoid reference issues
Limited set of built-in compaction strategies
No automatic compression strategy selection
Compression is lossy with some compactors (quantization)

§Design Considerations

Compactor Choice: Critical for achieving desired compression ratio
Data Characteristics: Compression effectiveness varies by FST structure
Access Patterns: Random access amplifies decompression overhead
Precision Requirements: Some compactors may reduce precision

§Future Enhancements

Adaptive Compression: Automatic selection of optimal compaction strategy
Streaming Support: Support for FSTs larger than available memory
Lossy Compression: Options for approximate FSTs with higher compression
Incremental Updates: Support for modifying compressed FSTs efficiently

§Available Compression Strategies

DefaultCompactor: Enum-based storage with moderate compression
BitPackCompactor: Bit-packing for small label/state spaces
QuantizedCompactor: Weight quantization for lossy compression
DeltaCompactor: Delta encoding for sequential patterns
VarIntCompactor: Variable-length integer encoding

§See Also

VectorFst for mutable, uncompressed FSTs
ConstFst for read-only, optimized FSTs without compression
CacheFst for caching expensive computations
Memory Management Guide for memory optimization strategies
Performance Tuning for trade-off analysis

Struct CompactFst Copy item path

§Design Characteristics

§Performance Profile

§Memory Layout and Compression

§Compression Strategies

§Default Compression

§Custom Compression Examples

§Use Cases

§Mobile/Embedded Deployment

§Large-Scale Dictionary Compression

§Cloud Storage Optimization

§Memory-Constrained Analysis

§Compactor Implementation Patterns

§Domain-Specific Compression

§Performance Optimization Guidelines

§When to Use CompactFst

§When NOT to Use CompactFst

§Memory vs. Performance Trade-offs

§Limitations and Considerations

§Current Implementation Limitations

§Design Considerations

§Future Enhancements

§Available Compression Strategies

§See Also

Implementations§

impl<W: Semiring, C: Compactor<W>> CompactFst<W, C>

pub fn new() -> Selfwhere C: Default,

§Examples

§Performance

pub fn with_compactor(compactor: C) -> Self

§Examples

pub fn from_fst<F: Fst<W>>(fst: &F) -> Selfwhere C: Default,

§Examples

§Performance

pub fn set_final_weight(&mut self, state: StateId, weight: Option<W>)

pub fn add_state(&mut self) -> StateId

impl<W: Semiring, C: Compactor<W>> CompactFst<W, C>

pub fn compression_ratio(&self) -> f64

pub fn force_recompress(&mut self)

pub fn enable_adaptive_compression(&mut self, config: AdaptiveConfig)

§Adaptive Compression Features

§Arguments

§Examples

pub fn enable_streaming(&mut self, config: StreamingConfig)

§Streaming Features

§Arguments

§Examples

pub fn analyze_compression_patterns(&self) -> CompressionAnalysis

§Returns

§Examples

pub fn stream_construct<I>(&mut self, input_stream: I, config: StreamingConfig)where I: Iterator<Item = Arc<W>>,

§Arguments

§Examples

impl<W: Semiring, C: Compactor<W>> CompactFst<W, C>

pub fn expanded_arcs(&self, state: StateId) -> Vec<Arc<W>>

§Arguments

§Returns

§Examples

pub fn expanded_arcs_cached(&self, state: StateId) -> Vec<Arc<W>>

pub fn prefetch_arcs<I>(&self, states: I)where I: IntoIterator<Item = StateId>,

§Arguments

§Examples

pub fn clear_arc_cache(&self)

pub fn cache_stats(&self) -> CacheStats

pub fn set_prefetching(&mut self, _enabled: bool)

pub fn batch_expand_arcs( &self, states: &[StateId], ) -> HashMap<StateId, Vec<Arc<W>>>

§Arguments

§Returns

pub fn supports_efficient_expansion(&self) -> bool

Trait Implementations§

impl<W: Clone + Semiring, C: Clone + Compactor<W>> Clone for CompactFst<W, C>where C::Element: Clone,

fn clone(&self) -> CompactFst<W, C>

fn clone_from(&mut self, source: &Self)

impl<W: Debug + Semiring, C: Debug + Compactor<W>> Debug for CompactFst<W, C>where C::Element: Debug,

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl<W: Semiring, C: Compactor<W> + Default> Default for CompactFst<W, C>

fn default() -> Self

impl<W: Semiring, C: Compactor<W>> ExpandedFst<W> for CompactFst<W, C>

§On-Demand Decompression Strategy

§Performance Characteristics

Struct CompactFst

pub fn new() -> Self
where C: Default,

pub fn from_fst<F: Fst<W>>(fst: &F) -> Self
where C: Default,

pub fn stream_construct<I>(&mut self, input_stream: I, config: StreamingConfig)
where I: Iterator<Item = Arc<W>>,

pub fn prefetch_arcs<I>(&self, states: I)
where I: IntoIterator<Item = StateId>,

impl<W: Clone + Semiring, C: Clone + Compactor<W>> Clone for CompactFst<W, C>
where C::Element: Clone,

impl<W: Debug + Semiring, C: Debug + Compactor<W>> Debug for CompactFst<W, C>
where C::Element: Debug,

impl<W, C> Freeze for CompactFst<W, C>
where C: Freeze,

impl<W, C> RefUnwindSafe for CompactFst<W, C>
where C: RefUnwindSafe, W: RefUnwindSafe, <C as Compactor<W>>::Element: RefUnwindSafe,

impl<W, C> Unpin for CompactFst<W, C>
where C: Unpin, W: Unpin, <C as Compactor<W>>::Element: Unpin,

impl<W, C> UnwindSafe for CompactFst<W, C>
where C: UnwindSafe, W: UnwindSafe, <C as Compactor<W>>::Element: UnwindSafe,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<W, F> PathIterExt<W> for F
where W: Semiring, F: Fst<W>,

impl<T> ToOwned for T
where T: Clone,