braillify 2.0.0

Rust 기반 크로스플랫폼 한국어 점역 라이브러리
Documentation
use super::context::EncoderState;
use super::token::Token;
use super::token_rule::{TokenAction, TokenPhase, TokenRule};

pub struct TokenRuleEngine {
    rules: Vec<Box<dyn TokenRule>>,
    sorted: bool,
}

impl TokenRuleEngine {
    pub fn new() -> Self {
        Self {
            rules: Vec::new(),
            sorted: false,
        }
    }

    pub fn register(&mut self, rule: Box<dyn TokenRule>) {
        self.rules.push(rule);
        self.sorted = false;
    }

    fn ensure_sorted(&mut self) {
        if !self.sorted {
            self.rules.sort_by_key(|r| (r.phase() as u8, r.priority()));
            self.sorted = true;
        }
    }

    /// Apply all rules in phase order. Handle token insertions/removals correctly.
    pub fn apply_all<'a>(
        &mut self,
        tokens: &mut Vec<Token<'a>>,
        state: &mut EncoderState,
    ) -> Result<(), String> {
        self.ensure_sorted();

        for phase in [
            TokenPhase::Normalization,
            TokenPhase::FractionDetection,
            TokenPhase::WordShortcut,
            TokenPhase::ModeEntry,
            TokenPhase::UppercasePassage,
            TokenPhase::PostWord,
        ] {
            let mut i = 0usize;

            while i < tokens.len() {
                for rule in &self.rules {
                    if rule.phase() != phase {
                        continue;
                    }

                    match rule.apply(tokens, i, state)? {
                        TokenAction::Noop => {
                            if matches!(phase, TokenPhase::Normalization | TokenPhase::PostWord) {
                                continue;
                            }
                        }
                        TokenAction::Replace(t) => {
                            tokens[i] = t;
                        }
                        #[cfg(test)]
                        TokenAction::InsertBefore(ts) => {
                            let count = ts.len();
                            tokens.splice(i..i, ts);
                            i += count;
                        }
                        TokenAction::ReplaceMany(ts) => {
                            let count = ts.len();
                            tokens.splice(i..=i, ts);
                            i += count.saturating_sub(1);
                        }
                        #[cfg(test)]
                        TokenAction::Remove => {
                            tokens.remove(i);
                            continue;
                        }
                    }
                    break;
                }
                i += 1;
            }
        }

        Ok(())
    }
}

impl Default for TokenRuleEngine {
    fn default() -> Self {
        Self::new()
    }
}

#[cfg(test)]
mod tests {
    use std::borrow::Cow;

    use super::*;
    use crate::rules::token::{SpaceKind, WordMeta, WordToken};

    struct ReplaceWordAt0;
    impl TokenRule for ReplaceWordAt0 {
        fn phase(&self) -> TokenPhase {
            TokenPhase::Normalization
        }
        fn apply<'a>(
            &self,
            tokens: &[Token<'a>],
            index: usize,
            _state: &mut EncoderState,
        ) -> Result<TokenAction<'a>, String> {
            if index == 0 {
                return Ok(TokenAction::Replace(Token::PreEncoded(vec![9])));
            }
            if matches!(tokens.get(index), Some(Token::Word(_))) {
                return Ok(TokenAction::Noop);
            }
            Ok(TokenAction::Noop)
        }
    }

    struct InsertSpaceBeforeSecond;
    impl TokenRule for InsertSpaceBeforeSecond {
        fn phase(&self) -> TokenPhase {
            TokenPhase::PostWord
        }
        fn apply<'a>(
            &self,
            tokens: &[Token<'a>],
            index: usize,
            _state: &mut EncoderState,
        ) -> Result<TokenAction<'a>, String> {
            if index == 1 && matches!(tokens.get(index), Some(Token::Word(_))) {
                return Ok(TokenAction::InsertBefore(vec![Token::Space(
                    SpaceKind::Regular,
                )]));
            }
            Ok(TokenAction::Noop)
        }
    }

    struct RemoveWordB;
    impl TokenRule for RemoveWordB {
        fn phase(&self) -> TokenPhase {
            TokenPhase::PostWord
        }
        fn apply<'a>(
            &self,
            tokens: &[Token<'a>],
            index: usize,
            _state: &mut EncoderState,
        ) -> Result<TokenAction<'a>, String> {
            if let Some(Token::Word(w)) = tokens.get(index)
                && w.text == "b"
            {
                return Ok(TokenAction::Remove);
            }
            Ok(TokenAction::Noop)
        }
    }

    struct ReplaceManyForB;
    impl TokenRule for ReplaceManyForB {
        fn phase(&self) -> TokenPhase {
            TokenPhase::PostWord
        }
        fn priority(&self) -> u16 {
            50
        }
        fn apply<'a>(
            &self,
            tokens: &[Token<'a>],
            index: usize,
            _state: &mut EncoderState,
        ) -> Result<TokenAction<'a>, String> {
            if let Some(Token::Word(w)) = tokens.get(index)
                && w.text == "b"
            {
                return Ok(TokenAction::ReplaceMany(vec![
                    Token::PreEncoded(vec![1]),
                    Token::PreEncoded(vec![2]),
                ]));
            }
            Ok(TokenAction::Noop)
        }
    }

    fn word_token(text: &'static str) -> Token<'static> {
        let chars: Vec<char> = text.chars().collect();
        Token::Word(WordToken {
            text: Cow::Borrowed(text),
            chars: chars.clone(),
            meta: WordMeta::from_chars(&chars),
        })
    }

    #[test]
    fn token_engine_sorts_and_applies_by_phase_priority() {
        let mut engine = TokenRuleEngine::new();
        engine.register(Box::new(InsertSpaceBeforeSecond));
        engine.register(Box::new(ReplaceWordAt0));

        let mut tokens = vec![word_token("a"), word_token("b")];
        let mut state = EncoderState::new(false);
        engine.apply_all(&mut tokens, &mut state).unwrap();

        assert!(matches!(tokens[0], Token::PreEncoded(ref b) if b == &vec![9]));
        assert!(matches!(tokens[1], Token::Space(SpaceKind::Regular)));
        assert!(matches!(tokens[2], Token::Word(_)));
    }

    #[test]
    fn token_engine_insert_replace_remove_index_handling() {
        let mut engine = TokenRuleEngine::new();
        engine.register(Box::new(ReplaceWordAt0));
        engine.register(Box::new(RemoveWordB));

        let mut tokens = vec![word_token("a"), word_token("b"), word_token("c")];
        let mut state = EncoderState::new(false);
        engine.apply_all(&mut tokens, &mut state).unwrap();

        assert_eq!(tokens.len(), 2);
        assert!(matches!(tokens[0], Token::PreEncoded(_)));
        assert!(matches!(&tokens[1], Token::Word(w) if w.text == "c"));
    }

    #[test]
    fn token_engine_replace_many_updates_index_safely() {
        let mut engine = TokenRuleEngine::new();
        engine.register(Box::new(ReplaceManyForB));

        let mut tokens = vec![word_token("a"), word_token("b"), word_token("c")];
        let mut state = EncoderState::new(false);
        engine.apply_all(&mut tokens, &mut state).unwrap();

        assert_eq!(tokens.len(), 4);
        assert!(matches!(&tokens[0], Token::Word(w) if w.text == "a"));
        assert!(matches!(tokens[1], Token::PreEncoded(ref b) if b == &vec![1]));
        assert!(matches!(tokens[2], Token::PreEncoded(ref b) if b == &vec![2]));
        assert!(matches!(&tokens[3], Token::Word(w) if w.text == "c"));
    }
}