#![cfg_attr(feature = "pattern", feature(pattern))]
#![cfg_attr(feature = "benchmarks", feature(test))]
#[cfg(test)]
#[macro_use]
extern crate lazy_static;
#[cfg(test)]
extern crate memmap;
#[cfg(test)]
extern crate proptest;
#[cfg(test)]
extern crate region;
use std::marker::PhantomData;
include!(concat!(env!("OUT_DIR"), "/src/macros.rs"));
#[cfg(any(jetscii_sse4_2 = "yes", jetscii_sse4_2 = "maybe"))]
mod simd;
#[cfg(any(jetscii_sse4_2 = "maybe", jetscii_sse4_2 = "no"))]
mod fallback;
#[cfg(feature = "pattern")]
mod pattern;
macro_rules! dispatch {
(simd: $simd:expr,fallback: $fallback:expr,) => {
#[cfg(jetscii_sse4_2 = "yes")]
{
$simd
}
#[cfg(jetscii_sse4_2 = "no")]
{
$fallback
}
#[cfg(jetscii_sse4_2 = "maybe")]
{
if is_x86_feature_detected!("sse4.2") {
$simd
} else {
$fallback
}
}
};
}
pub struct Bytes<F>
where
F: Fn(u8) -> bool,
{
#[cfg(any(jetscii_sse4_2 = "yes", jetscii_sse4_2 = "maybe"))]
simd: simd::Bytes,
#[cfg(any(jetscii_sse4_2 = "maybe", jetscii_sse4_2 = "no"))]
fallback: fallback::Bytes<F>,
_fallback: PhantomData<F>,
}
impl<F> Bytes<F>
where
F: Fn(u8) -> bool,
{
#[allow(unused_variables)]
pub fn new(bytes: [u8; 16], len: i32, fallback: F) -> Self {
Bytes {
#[cfg(any(jetscii_sse4_2 = "yes", jetscii_sse4_2 = "maybe"))]
simd: simd::Bytes::new(bytes, len),
#[cfg(any(jetscii_sse4_2 = "maybe", jetscii_sse4_2 = "no"))]
fallback: fallback::Bytes::new(fallback),
_fallback: PhantomData,
}
}
#[inline]
pub fn find(&self, haystack: &[u8]) -> Option<usize> {
dispatch! {
simd: unsafe { self.simd.find(haystack) },
fallback: self.fallback.find(haystack),
}
}
}
pub type BytesConst = Bytes<fn(u8) -> bool>;
pub struct AsciiChars<F>(Bytes<F>)
where
F: Fn(u8) -> bool;
impl<F> AsciiChars<F>
where
F: Fn(u8) -> bool,
{
pub fn new(chars: [u8; 16], len: i32, fallback: F) -> Self {
for &b in &chars {
assert!(b < 128, "Cannot have non-ASCII bytes");
}
AsciiChars(Bytes::new(chars, len, fallback))
}
#[inline]
pub fn find(&self, haystack: &str) -> Option<usize> {
self.0.find(haystack.as_bytes())
}
}
pub type AsciiCharsConst = AsciiChars<fn(u8) -> bool>;
pub struct ByteSubstring<'a> {
#[cfg(any(jetscii_sse4_2 = "yes", jetscii_sse4_2 = "maybe"))]
simd: simd::ByteSubstring<'a>,
#[cfg(any(jetscii_sse4_2 = "maybe", jetscii_sse4_2 = "no"))]
fallback: fallback::ByteSubstring<'a>,
}
impl<'a> ByteSubstring<'a> {
pub fn new(needle: &'a [u8]) -> Self {
ByteSubstring {
#[cfg(any(jetscii_sse4_2 = "yes", jetscii_sse4_2 = "maybe"))]
simd: simd::ByteSubstring::new(needle),
#[cfg(any(jetscii_sse4_2 = "maybe", jetscii_sse4_2 = "no"))]
fallback: fallback::ByteSubstring::new(needle),
}
}
#[cfg(feature = "pattern")]
fn needle_len(&self) -> usize {
dispatch! {
simd: self.simd.needle_len(),
fallback: self.fallback.needle_len(),
}
}
#[inline]
pub fn find(&self, haystack: &[u8]) -> Option<usize> {
dispatch! {
simd: unsafe { self.simd.find(haystack) },
fallback: self.fallback.find(haystack),
}
}
}
pub type ByteSubstringConst = ByteSubstring<'static>;
pub struct Substring<'a>(ByteSubstring<'a>);
impl<'a> Substring<'a> {
pub fn new(needle: &'a str) -> Self {
Substring(ByteSubstring::new(needle.as_bytes()))
}
#[cfg(feature = "pattern")]
fn needle_len(&self) -> usize {
self.0.needle_len()
}
#[inline]
pub fn find(&self, haystack: &str) -> Option<usize> {
self.0.find(haystack.as_bytes())
}
}
pub type SubstringConst = Substring<'static>;
#[cfg(all(test, feature = "benchmarks"))]
mod bench {
extern crate test;
use super::*;
lazy_static! {
static ref SPACE: AsciiCharsConst = ascii_chars!(' ');
static ref XML_DELIM_3: AsciiCharsConst = ascii_chars!('<', '>', '&');
static ref XML_DELIM_5: AsciiCharsConst = ascii_chars!('<', '>', '&', '\'', '"');
}
fn prefix_string() -> String {
"a".repeat(5 * 1024 * 1024)
}
fn bench_space<F>(b: &mut test::Bencher, f: F)
where
F: Fn(&str) -> Option<usize>,
{
let mut haystack = prefix_string();
haystack.push(' ');
b.iter(|| test::black_box(f(&haystack)));
b.bytes = haystack.len() as u64;
}
#[bench]
fn space_ascii_chars(b: &mut test::Bencher) {
bench_space(b, |hs| SPACE.find(hs))
}
#[bench]
fn space_stdlib_find_string(b: &mut test::Bencher) {
bench_space(b, |hs| hs.find(" "))
}
#[bench]
fn space_stdlib_find_char(b: &mut test::Bencher) {
bench_space(b, |hs| hs.find(' '))
}
#[bench]
fn space_stdlib_find_char_set(b: &mut test::Bencher) {
bench_space(b, |hs| hs.find(&[' '][..]))
}
#[bench]
fn space_stdlib_find_closure(b: &mut test::Bencher) {
bench_space(b, |hs| hs.find(|c| c == ' '))
}
#[bench]
fn space_stdlib_iterator_position(b: &mut test::Bencher) {
bench_space(b, |hs| hs.as_bytes().iter().position(|&v| v == b' '))
}
fn bench_xml_delim_3<F>(b: &mut test::Bencher, f: F)
where
F: Fn(&str) -> Option<usize>,
{
let mut haystack = prefix_string();
haystack.push('&');
b.iter(|| test::black_box(f(&haystack)));
b.bytes = haystack.len() as u64;
}
#[bench]
fn xml_delim_3_ascii_chars(b: &mut test::Bencher) {
bench_xml_delim_3(b, |hs| XML_DELIM_3.find(hs))
}
#[bench]
fn xml_delim_3_stdlib_find_char_set(b: &mut test::Bencher) {
bench_xml_delim_3(b, |hs| hs.find(&['<', '>', '&'][..]))
}
#[bench]
fn xml_delim_3_stdlib_find_char_closure(b: &mut test::Bencher) {
bench_xml_delim_3(b, |hs| hs.find(|c| c == '<' || c == '>' || c == '&'))
}
#[bench]
fn xml_delim_3_stdlib_iterator_position(b: &mut test::Bencher) {
bench_xml_delim_3(b, |hs| {
hs.as_bytes()
.iter()
.position(|&c| c == b'<' || c == b'>' || c == b'&')
})
}
fn bench_xml_delim_5<F>(b: &mut test::Bencher, f: F)
where
F: Fn(&str) -> Option<usize>,
{
let mut haystack = prefix_string();
haystack.push('"');
b.iter(|| test::black_box(f(&haystack)));
b.bytes = haystack.len() as u64;
}
#[bench]
fn xml_delim_5_ascii_chars(b: &mut test::Bencher) {
bench_xml_delim_5(b, |hs| XML_DELIM_5.find(hs))
}
#[bench]
fn xml_delim_5_stdlib_find_char_set(b: &mut test::Bencher) {
bench_xml_delim_5(b, |hs| hs.find(&['<', '>', '&', '\'', '"'][..]))
}
#[bench]
fn xml_delim_5_stdlib_find_char_closure(b: &mut test::Bencher) {
bench_xml_delim_5(b, |hs| {
hs.find(|c| c == '<' || c == '>' || c == '&' || c == '\'' || c == '"')
})
}
#[bench]
fn xml_delim_5_stdlib_iterator_position(b: &mut test::Bencher) {
bench_xml_delim_3(b, |hs| {
hs.as_bytes()
.iter()
.position(|&c| c == b'<' || c == b'>' || c == b'&' || c == b'\'' || c == b'"')
})
}
lazy_static! {
static ref XYZZY: Substring<'static> = Substring::new("xyzzy");
}
fn bench_substring<F>(b: &mut test::Bencher, f: F)
where
F: Fn(&str) -> Option<usize>,
{
let mut haystack = prefix_string();
haystack.push_str("xyzzy");
b.iter(|| test::black_box(f(&haystack)));
b.bytes = haystack.len() as u64;
}
#[bench]
fn substring_with_created_searcher(b: &mut test::Bencher) {
bench_substring(b, |hs| XYZZY.find(hs))
}
#[bench]
fn substring_stdlib_find(b: &mut test::Bencher) {
bench_substring(b, |hs| hs.find("xyzzy"))
}
}