Crate markov_str

source ·
Expand description

Fast and memory efficient Markov Chain implementation, optimized for text generation

§Example

use markov_str::*;
use regex::Regex;
use std::fs::{self, read_to_string};

let training_path = "data";

// Gets the paths of evey file and directory in the training_path.
let tpaths = fs::read_dir(training_path)
    .unwrap_or_else(|_| panic!("Can't read files from: {}", training_path));

// Only the files remain
let files = tpaths
    .filter_map(|f| f.ok())
    .filter(|f| match f.file_type() {
        Err(_) => false,
        Ok(f) => f.is_file(),
    });

// Reads every file into a string
let contents = files.filter_map(|f| read_to_string(f.path()).ok());

// Creating the Markov Chain
let markov_chain = contents.fold(
    MarkovChain::with_capacity(2, 8_000_000, Regex::new(WORD_REGEX).unwrap()),
    |mut a, s| {
        a.add_text(&s);
        a
    },
);

// Number of tokens
println!("{}", markov_chain.len());

// Generation
for _ in 0..10 {
    println!("{}", markov_chain.generate_start("among the       ", 25).unwrap());
}

This example is taken from the src/main.rs, you can run it by:

./get_data.sh
cargo run --release

./get_data.sh will download the first 200 books from Project Gutenberg, which totals up to more than 100MBs of text.

§License

markov_str is licensed under the MIT license. Feel free to fork and use however you like.

Structs§

  • Represents a Markov Chain that is designed to generate text.

Statics§