strizer 0.1.0

minimal and fast library for text tokenization
Documentation
  • Coverage
  • 81.25%
    13 out of 16 items documented4 out of 12 items with examples
  • Size
  • Source code size: 21.76 kB This is the summed size of all the files inside the crates.io package for this release.
  • Documentation size: 5.18 MB This is the summed size of all files generated by rustdoc for all configured targets
  • Links
  • Homepage
  • aleics/strizer
    1 0 0
  • crates.io
  • Dependencies
  • Versions
  • Owners
  • aleics

strizer

CI

strizer is a minimal and fast library for text tokenization.

Usage

Install

Add this to your Cargo.toml:

[dependencies]
strizer = "0.1.0"

StreamTokenizer

use std::fs::File;
use std::io::BufReader;
use strizer::{StreamTokenizer, Token, TokenKind};

fn main() -> std::io::Result<()> {
  // read contest to a reader buffer
  let file = File::open("log.txt")?;
  let mut reader = BufReader::new(file);

  // tokenize BufRead, and count number of "ERROR" words
  let error_count = StreamTokenizer::new(&mut reader, &[])
    .filter(|(_, _, slice)| slice == "ERROR")
    .count();

  println!("number of error logs: {}", error_count);
  Ok(())
}

StringTokenizer

use strizer::StringTokenizer;

fn main() -> std::io::Result<()> {
  // tokenize input string and count the amount of words
  let token_count = StringTokenizer::new("hello world", &[]).count();

  println!("number of words: {}", token_count);
  Ok(())
}

License

MIT