tendril 0.4.1

Compact buffer/string type for zero-copy parsing
Documentation
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use std::collections::hash_map::{HashMap, Entry};
use std::borrow::ToOwned;

use tendril::StrTendril;

fn index_words_string(input: &String) -> HashMap<char, Vec<String>> {
    let mut index = HashMap::new();
    for word in input.split(|c| c == ' ') {
        if word.len() == 0 {
            continue;
        }
        let word = word.to_owned();
        match index.entry(word.chars().next().unwrap()) {
            Entry::Occupied(mut e) => {
                let x: &mut Vec<String> = e.get_mut();
                x.push(word);
            }
            Entry::Vacant(e) => { e.insert(vec![word]); }
        }
    }
    index
}

fn index_words_tendril(input: &StrTendril) -> HashMap<char, Vec<StrTendril>> {
    let mut index = HashMap::new();
    let mut t = input.clone();
    loop {
        match t.pop_front_char_run(|c| c != ' ') {
            None => return index,
            Some((_, false)) => (),
            Some((word, true)) => match index.entry(word.chars().next().unwrap()) {
                Entry::Occupied(mut e) => { e.get_mut().push(word); }
                Entry::Vacant(e) => { e.insert(vec![word]); }
            }
        }
    }
}

static EN_1: &'static str
    = "Days turn to nights turn to paper into rocks into plastic";

static EN_2: &'static str
    = "Here the notes in my laboratory journal cease. I was able to write the last \
       words only with great effort. By now it was already clear to me that LSD had \
       been the cause of the remarkable experience of the previous Friday, for the \
       altered perceptions were of the same type as before, only much more intense. I \
       had to struggle to speak intelligibly. I asked my laboratory assistant, who was \
       informed of the self-experiment, to escort me home. We went by bicycle, no \
       automobile being available because of wartime restrictions on their use. On the \
       way home, my condition began to assume threatening forms. Everything in my \
       field of vision wavered and was distorted as if seen in a curved mirror. I also \
       had the sensation of being unable to move from the spot. Nevertheless, my \
       assistant later told me that we had traveled very rapidly. Finally, we arrived \
       at home safe and sound, and I was just barely capable of asking my companion to \
       summon our family doctor and request milk from the neighbors.\n\n\
       In spite of my delirious, bewildered condition, I had brief periods of clear \
       and effective thinking—and chose milk as a nonspecific antidote for poisoning.";

static KR_1: &'static str
    = "러스트(Rust)는 모질라(mozilla.org)에서 개발하고 있는, 메모리-안전하고 병렬 \
       프로그래밍이 쉬운 차세대 프로그래밍 언어입니다. 아직 \
       개발 단계이며 많은 기능이 구현 중으로, MIT/Apache2 라이선스로 배포됩니다.";

static HTML_KR_1: &'static str
    = "<p>러스트(<a href=\"http://rust-lang.org\">Rust</a>)는 모질라(<a href=\"\
       https://www.mozilla.org/\">mozilla.org</a>)에서 개발하고 있는, \
       메모리-안전하고 병렬 프로그래밍이 쉬운 차세대 프로그래밍 언어입니다. \
       아직 개발 단계이며 많은 기능이 구현 중으로, MIT/Apache2 라이선스로 배포됩니다.</p>";

mod index_words {
    macro_rules! bench {
        ($txt:ident) => {
            #[allow(non_snake_case)]
            mod $txt {
                const SMALL_SIZE: usize = 65536;
                const LARGE_SIZE: usize = (1 << 20);

                #[bench]
                fn index_words_string(b: &mut ::test::Bencher) {
                    let mut s = String::new();
                    while s.len() < SMALL_SIZE {
                        s.push_str(::tendril::bench::$txt);
                    }
                    b.iter(|| {
                        ::tendril::bench::index_words_string(&s)
                    });
                }

                #[bench]
                fn index_words_tendril(b: &mut ::test::Bencher) {
                    let mut t = ::tendril::StrTendril::new();
                    while t.len() < SMALL_SIZE {
                        t.push_slice(::tendril::bench::$txt);
                    }
                    b.iter(|| {
                        ::tendril::bench::index_words_tendril(&t)
                    });
                }

                #[bench]
                fn index_words_big_string(b: &mut ::test::Bencher) {
                    let mut s = String::new();
                    while s.len() < LARGE_SIZE {
                        s.push_str(::tendril::bench::$txt);
                    }
                    b.iter(|| {
                        ::tendril::bench::index_words_string(&s)
                    });
                }

                #[bench]
                fn index_words_big_tendril(b: &mut ::test::Bencher) {
                    let mut t = ::tendril::StrTendril::new();
                    while t.len() < LARGE_SIZE {
                        t.push_slice(::tendril::bench::$txt);
                    }
                    b.iter(|| {
                        ::tendril::bench::index_words_tendril(&t)
                    });
                }

                #[test]
                fn correctness() {
                    use std::borrow::ToOwned;
                    use tendril::SliceExt;
                    use tendril::bench::{index_words_string, index_words_tendril};

                    let txt = ::tendril::bench::$txt;
                    let input_string = txt.to_owned();
                    let count_s = index_words_string(&input_string);
                    let mut keys: Vec<char> = count_s.keys().cloned().collect();
                    keys.sort();

                    let input_tendril = txt.to_tendril();
                    let count_t = index_words_tendril(&input_tendril);
                    let mut keys_t: Vec<char> = count_t.keys().cloned().collect();
                    keys_t.sort();

                    assert_eq!(keys, keys_t);

                    for k in &keys {
                        let vs = &count_s[k];
                        let vt = &count_t[k];
                        assert_eq!(vs.len(), vt.len());
                        assert!(vs.iter().zip(vt.iter()).all(|(s, t)| **s == **t));
                    }
                }
            }
        }
    }

    bench!(EN_1);
    bench!(EN_2);
    bench!(KR_1);
    bench!(HTML_KR_1);
}