rust-stemmers 1.2.0

A rust implementation of some popular snowball stemming algorithms
Documentation
integers ( p1 p2 )
booleans ( Y_found )

routines (
    prelude postlude
    mark_regions
    shortv
    R1 R2
    Step_1a Step_1b Step_1c Step_2 Step_3 Step_4 Step_5
    exception1
    exception2
)

externals ( stem )

groupings ( v v_WXY valid_LI )

stringescapes {}

define v        'aeiouy'
define v_WXY    v + 'wxY'

define valid_LI 'cdeghkmnrt'

define prelude as (
    unset Y_found
    do ( ['{'}'] delete)
    do ( ['y'] <-'Y' set Y_found)
    do repeat(goto (v ['y']) <-'Y' set Y_found)
)

define mark_regions as (
    $p1 = limit
    $p2 = limit
    do(
        among (
            'gener'
            'commun'  //  added May 2005
            'arsen'   //  added Nov 2006 (arsenic/arsenal)
            // ... extensions possible here ...
        ) or (gopast v  gopast non-v)
        setmark p1
        gopast v  gopast non-v  setmark p2
    )
)

backwardmode (

    define shortv as (
        ( non-v_WXY v non-v )
        or
        ( non-v v atlimit )
    )

    define R1 as $p1 <= cursor
    define R2 as $p2 <= cursor

    define Step_1a as (
        try (
            [substring] among (
                '{'}' '{'}s' '{'}s{'}'
                       (delete)
            )
        )
        [substring] among (
            'sses' (<-'ss')
            'ied' 'ies'
                   ((hop 2 <-'i') or <-'ie')
            's'    (next gopast v delete)
            'us' 'ss'
        )
    )

    define Step_1b as (
        [substring] among (
            'eed' 'eedly'
                (R1 <-'ee')
            'ed' 'edly' 'ing' 'ingly'
                (
                test gopast v  delete
                test substring among(
                    'at' 'bl' 'iz'
                         (<+ 'e')
                    'bb' 'dd' 'ff' 'gg' 'mm' 'nn' 'pp' 'rr' 'tt'
                    // ignoring double c, h, j, k, q, v, w, and x
                         ([next]  delete)
                    ''   (atmark p1  test shortv  <+ 'e')
                )
            )
        )
    )

    define Step_1c as (
        ['y' or 'Y']
        non-v not atlimit
        <-'i'
    )

    define Step_2 as (
        [substring] R1 among (
            'tional'  (<-'tion')
            'enci'    (<-'ence')
            'anci'    (<-'ance')
            'abli'    (<-'able')
            'entli'   (<-'ent')
            'izer' 'ization'
                      (<-'ize')
            'ational' 'ation' 'ator'
                      (<-'ate')
            'alism' 'aliti' 'alli'
                      (<-'al')
            'fulness' (<-'ful')
            'ousli' 'ousness'
                      (<-'ous')
            'iveness' 'iviti'
                      (<-'ive')
            'biliti' 'bli'
                      (<-'ble')
            'ogi'     ('l' <-'og')
            'fulli'   (<-'ful')
            'lessli'  (<-'less')
            'li'      (valid_LI delete)
        )
    )

    define Step_3 as (
        [substring] R1 among (
            'tional'  (<- 'tion')
            'ational' (<- 'ate')
            'alize'   (<-'al')
            'icate' 'iciti' 'ical'
                      (<-'ic')
            'ful' 'ness'
                      (delete)
            'ative'
                      (R2 delete)  // 'R2' added Dec 2001
        )
    )

    define Step_4 as (
        [substring] R2 among (
            'al' 'ance' 'ence' 'er' 'ic' 'able' 'ible' 'ant' 'ement'
            'ment' 'ent' 'ism' 'ate' 'iti' 'ous' 'ive' 'ize'
                      (delete)
            'ion'     ('s' or 't' delete)
        )
    )

    define Step_5 as (
        [substring] among (
            'e' (R2 or (R1 not shortv) delete)
            'l' (R2 'l' delete)
        )
    )

    define exception2 as (

        [substring] atlimit among(
            'inning' 'outing' 'canning' 'herring' 'earring'
            'proceed' 'exceed' 'succeed'

            // ... extensions possible here ...

        )
    )
)

define exception1 as (

    [substring] atlimit among(

        /* special changes: */

        'skis'      (<-'ski')
        'skies'     (<-'sky')
        'dying'     (<-'die')
        'lying'     (<-'lie')
        'tying'     (<-'tie')

        /* special -LY cases */

        'idly'      (<-'idl')
        'gently'    (<-'gentl')
        'ugly'      (<-'ugli')
        'early'     (<-'earli')
        'only'      (<-'onli')
        'singly'    (<-'singl')

        // ... extensions possible here ...

        /* invariant forms: */

        'sky'
        'news'
        'howe'

        'atlas' 'cosmos' 'bias' 'andes' // not plural forms

        // ... extensions possible here ...
    )
)

define postlude as (Y_found  repeat(goto (['Y']) <-'y'))

define stem as (

    exception1 or
    not hop 3 or (
        do prelude
        do mark_regions
        backwards (

            do Step_1a

            exception2 or (

                do Step_1b
                do Step_1c

                do Step_2
                do Step_3
                do Step_4

                do Step_5
            )
        )
        do postlude
    )
)