rust-stemmers 1.2.0

A rust implementation of some popular snowball stemming algorithms
Documentation
/*
Hungarian Stemmer
Removes noun inflections
*/

routines (
    mark_regions
    R1
    v_ending
    case
    case_special
    case_other
    plural
    owned
    sing_owner
    plur_owner
    instrum
    factive
    undouble
    double
)

externals ( stem )

integers ( p1 )
groupings ( v )

stringescapes {}

/* special characters (in Unicode) */

stringdef a'  hex 'E1'  //a-acute
stringdef e'  hex 'E9'  //e-acute
stringdef i'  hex 'ED'  //i-acute
stringdef o'  hex 'F3'  //o-acute
stringdef o"  hex 'F6'  //o-umlaut
stringdef oq  hex '151' //o-double acute
stringdef u'  hex 'FA'  //u-acute
stringdef u"  hex 'FC'  //u-umlaut
stringdef uq  hex '171' //u-double acute

define v 'aeiou{a'}{e'}{i'}{o'}{o"}{oq}{u'}{u"}{uq}'

define mark_regions as (

    $p1 = limit

    (v goto non-v
     among('cs' 'gy' 'ly' 'ny' 'sz' 'ty' 'zs' 'dzs') or next
     setmark p1)
    or

    (non-v gopast v setmark p1)
)

backwardmode (

    define R1 as $p1 <= cursor

    define v_ending as (
        [substring] R1 among(
            '{a'}' (<- 'a')
            '{e'}' (<- 'e')
        )
    )

    define double as (
        test among('bb' 'cc' 'ccs' 'dd' 'ff' 'gg' 'ggy' 'jj' 'kk' 'll' 'lly' 'mm'
        'nn' 'nny' 'pp' 'rr' 'ss' 'ssz' 'tt' 'tty' 'vv' 'zz' 'zzs')
    )

    define undouble as (
        next [hop 1] delete
    )

    define instrum as(
        [substring] R1 among(
            'al' (double)
            'el' (double)
        )
        delete
        undouble
    )


    define case as (
        [substring] R1 among(
            'ban' 'ben'
            'ba' 'be'
            'ra' 're'
            'nak' 'nek'
            'val' 'vel'
            't{o'}l' 't{oq}l'
            'r{o'}l' 'r{oq}l'
            'b{o'}l' 'b{oq}l'
            'hoz' 'hez' 'h{o"}z'
            'n{a'}l' 'n{e'}l'
            'ig'
            'at' 'et' 'ot' '{o"}t'
            '{e'}rt'
            'k{e'}pp' 'k{e'}ppen'
            'kor'
            'ul' '{u"}l'
            'v{a'}' 'v{e'}'
            'onk{e'}nt' 'enk{e'}nt' 'ank{e'}nt'
            'k{e'}nt'
            'en' 'on' 'an' '{o"}n'
            'n'
            't'
        )
        delete
        v_ending
    )

    define case_special as(
        [substring] R1 among(
            '{e'}n' (<- 'e')
            '{a'}n' (<- 'a')
            '{a'}nk{e'}nt' (<- 'a')
        )
    )

    define case_other as(
        [substring] R1 among(
            'astul' 'est{u"}l' (delete)
            'stul' 'st{u"}l' (delete)
            '{a'}stul' (<- 'a')
            '{e'}st{u"}l' (<- 'e')
        )
    )

    define factive as(
        [substring] R1 among(
            '{a'}' (double)
            '{e'}' (double)
        )
        delete
        undouble
    )

    define plural as (
        [substring] R1 among(
            '{a'}k' (<- 'a')
            '{e'}k' (<- 'e')
            '{o"}k' (delete)
            'ak' (delete)
            'ok' (delete)
            'ek' (delete)
            'k' (delete)
        )
    )

    define owned as (
        [substring] R1 among (
            'ok{e'}' '{o"}k{e'}' 'ak{e'}' 'ek{e'}' (delete)
            '{e'}k{e'}' (<- 'e')
            '{a'}k{e'}' (<- 'a')
            'k{e'}' (delete)
            '{e'}{e'}i' (<- 'e')
            '{a'}{e'}i' (<- 'a')
            '{e'}i'  (delete)
            '{e'}{e'}' (<- 'e')
            '{e'}' (delete)
        )
    )

    define sing_owner as (
        [substring] R1 among(
            '{u"}nk' 'unk' (delete)
            '{a'}nk' (<- 'a')
            '{e'}nk' (<- 'e')
            'nk' (delete)
            '{a'}juk' (<- 'a')
            '{e'}j{u"}k' (<- 'e')
            'juk' 'j{u"}k' (delete)
            'uk' '{u"}k' (delete)
            'em' 'om' 'am' (delete)
            '{a'}m' (<- 'a')
            '{e'}m' (<- 'e')
            'm' (delete)
            'od' 'ed' 'ad' '{o"}d' (delete)
            '{a'}d' (<- 'a')
            '{e'}d' (<- 'e')
            'd' (delete)
            'ja' 'je' (delete)
            'a' 'e' 'o' (delete)
            '{a'}' (<- 'a')
            '{e'}' (<- 'e')
        )
    )

    define plur_owner as (
        [substring] R1 among(
            'jaim' 'jeim' (delete)
            '{a'}im' (<- 'a')
            '{e'}im' (<- 'e')
            'aim' 'eim' (delete)
            'im' (delete)
            'jaid' 'jeid' (delete)
            '{a'}id' (<- 'a')
            '{e'}id' (<- 'e')
            'aid' 'eid' (delete)
            'id' (delete)
            'jai' 'jei' (delete)
            '{a'}i' (<- 'a')
            '{e'}i' (<- 'e')
            'ai' 'ei' (delete)
            'i' (delete)
            'jaink' 'jeink' (delete)
            'eink' 'aink' (delete)
            '{a'}ink' (<- 'a')
            '{e'}ink' (<- 'e')
            'ink'
            'jaitok' 'jeitek' (delete)
            'aitok' 'eitek' (delete)
            '{a'}itok' (<- 'a')
            '{e'}itek' (<- 'e')
            'itek' (delete)
            'jeik' 'jaik' (delete)
            'aik' 'eik' (delete)
            '{a'}ik' (<- 'a')
            '{e'}ik' (<- 'e')
            'ik' (delete)
        )
    )
)

define stem as (
    do mark_regions
    backwards (
      do instrum
        do case
        do case_special
        do case_other
        do factive
        do owned
        do sing_owner
        do plur_owner
        do plural
    )
)