extern crate utf8_width;
#[macro_use]
extern crate bencher;
use std::fs;
use bencher::Bencher;
#[cfg(unix)]
const TEXT_PATH: &str = "benches/data/wikipedia-rust.txt";
#[cfg(windows)]
const TEXT_PATH: &str = r"benches\data\wikipedia-rust.txt";
static UTF8_CHAR_WIDTH: [usize; 256] = [
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ];
fn retrieve_get_width(bencher: &mut Bencher) {
let bytes = fs::read(TEXT_PATH).unwrap();
let length = bytes.len();
bencher.iter(|| {
let mut widths = Vec::new();
let mut p = 0;
loop {
let e = bytes[p];
let width = utf8_width::get_width(e);
widths.push(width);
p += width;
if p == length {
break;
}
}
widths
});
bencher.bytes = length as u64;
}
fn retrieve_get_width_assume_valid(bencher: &mut Bencher) {
let bytes = fs::read(TEXT_PATH).unwrap();
let length = bytes.len();
bencher.iter(|| {
let mut widths = Vec::new();
let mut p = 0;
let length = bytes.len();
loop {
let e = bytes[p];
let width = unsafe { utf8_width::get_width_assume_valid(e) };
widths.push(width);
p += width;
if p == length {
break;
}
}
widths
});
bencher.bytes = length as u64;
}
fn retrieve_get_width_by_looking_table(bencher: &mut Bencher) {
let bytes = fs::read(TEXT_PATH).unwrap();
let length = bytes.len();
bencher.iter(|| {
let mut widths = Vec::new();
let mut p = 0;
let length = bytes.len();
loop {
let e = bytes[p];
let width = UTF8_CHAR_WIDTH[e as usize];
widths.push(width);
p += width;
if p == length {
break;
}
}
widths
});
bencher.bytes = length as u64;
}
fn retrieve_get_width_by_chars(bencher: &mut Bencher) {
let text = fs::read_to_string(TEXT_PATH).unwrap();
let length = text.len();
bencher.iter(|| {
let mut widths = Vec::new();
for c in text.chars() {
widths.push(c.len_utf8())
}
widths
});
bencher.bytes = length as u64;
}
benchmark_group!(get_width, retrieve_get_width, retrieve_get_width_assume_valid, retrieve_get_width_by_looking_table, retrieve_get_width_by_chars);
benchmark_main!(get_width);