rustling 0.2.0

A library for computational linguistics
Documentation

rustling

Rust crates.io

Python PyPI Supported Python versions

rustling is a library of tools for computational linguistics, implemented in Rust with Python bindings.

Features

  • Word Segmentation — Models for segmenting unsegmented text into words
    • LongestStringMatching — Greedy left-to-right longest match segmenter
    • RandomSegmenter — Random baseline segmenter

Installation

Python

pip install rustling

Rust

cargo add rustling

Usage

Python

from rustling.wordseg import LongestStringMatching, RandomSegmenter

# Longest String Matching
model = LongestStringMatching(max_word_length=4)
model.fit([
    ("this", "is", "a", "sentence"),
    ("that", "is", "not", "a", "sentence"),
])
result = model.predict(["thatisadog", "thisisnotacat"])
print(result)
# [['that', 'is', 'a', 'd', 'o', 'g'], ['this', 'is', 'not', 'a', 'c', 'a', 't']]

# Random Segmenter (no training needed)
segmenter = RandomSegmenter(prob=0.3)
result = segmenter.predict(["helloworld"])
print(result)
# e.g., [['hel', 'lo', 'wor', 'ld']] (varies due to randomness)

Rust

use rustling::wordseg::{LongestStringMatching, RandomSegmenter};

fn main() {
    // Longest String Matching
    let mut model = LongestStringMatching::new(4).unwrap();
    model.fit(vec![
        vec!["this".into(), "is".into(), "a".into(), "sentence".into()],
        vec!["that".into(), "is".into(), "not".into(), "a".into(), "sentence".into()],
    ]);
    let result = model.predict(vec!["thatisadog".into(), "thisisnotacat".into()]);
    println!("{:?}", result);
    // [["that", "is", "a", "d", "o", "g"], ["this", "is", "not", "a", "c", "a", "t"]]

    // Random Segmenter (no training needed)
    let segmenter = RandomSegmenter::new(0.3).unwrap();
    let result = segmenter.predict(vec!["helloworld".into()]);
    println!("{:?}", result);
    // e.g., [["hel", "lo", "wor", "ld"]] (varies due to randomness)
}

License

MIT License

Links