pub struct Database(/* private fields */);
Expand description
Read-only description of an in-memory state machine.
This type also serves as the entry point to the various types of pattern compilers, including literals, sets, and literal sets.
Implementations§
Source§impl Database
§Convenience Methods
These methods prepare some resource within a new heap allocation and are
useful for doctests and examples.
impl Database
§Convenience Methods
These methods prepare some resource within a new heap allocation and are useful for doctests and examples.
§Scratch Setup
Databases already require their own heap allocation, which can be managed
with the methods in Managing Allocations. However,
databases also impose a sort of implicit dynamic lifetime constraint on
Scratch
objects, which must be initialized against a db with
Scratch::setup_for_db()
before vectorscan can do any searching.
It is encouraged to re-use Scratch
objects across databases where
possible to minimize unnecessary allocations, but
Self::allocate_scratch()
is provided as a convenience method to quickly
produce a 1:1 db:scratch mapping.
§Serialization
While SerializedDb
offers a rich interface to wrap serialized bytes from
a variety of sources with alloc::DbAllocation
, Self::serialize()
simply returns a newly allocated region of bytes.
Sourcepub fn allocate_scratch(&self) -> Result<Scratch, VectorscanRuntimeError>
pub fn allocate_scratch(&self) -> Result<Scratch, VectorscanRuntimeError>
Call Scratch::setup_for_db()
on a newly allocated Scratch::blank
.
#[cfg(feature = "compiler")]
fn main() -> Result<(), vectorscan::error::VectorscanError> {
use vectorscan::{expression::*, flags::*, matchers::*};
let expr: Expression = "a+".parse()?;
let db = expr.compile(Flags::SOM_LEFTMOST, Mode::BLOCK)?;
let mut scratch = db.allocate_scratch()?;
let mut matches: Vec<&str> = Vec::new();
scratch
.scan_sync(&db, "aardvark".into(), |Match { source, .. }| {
matches.push(unsafe { source.as_str() });
MatchResult::Continue
})?;
assert_eq!(&matches, &["a", "aa", "a"]);
Ok(())
}
Sourcepub fn allocate_stream(&self) -> Result<LiveStream, VectorscanRuntimeError>
Available on crate feature stream
only.
pub fn allocate_stream(&self) -> Result<LiveStream, VectorscanRuntimeError>
stream
only.Call LiveStream::open()
on self
.
#[cfg(feature = "compiler")]
fn main() -> Result<(), vectorscan::error::VectorscanError> {
use vectorscan::{expression::*, flags::*, matchers::*, stream::*};
use std::ops::Range;
let expr: Expression = "a+".parse()?;
let db = expr.compile(Flags::SOM_LEFTMOST, Mode::STREAM | Mode::SOM_HORIZON_SMALL)?;
let scratch = db.allocate_scratch()?;
let live = db.allocate_stream()?;
let data = "aardvark";
let mut matches: Vec<&str> = Vec::new();
let mut match_fn = |StreamMatch { range, .. }| {
let range: Range<usize> = range.into();
matches.push(&data[range]);
MatchResult::Continue
};
{
let matcher = StreamMatcher::new(&mut match_fn);
let mut sink = ScratchStreamSink::new(live, matcher, scratch);
sink.scan(data.into())?;
sink.flush_eod()?;
}
assert_eq!(&matches, &["a", "aa", "a"]);
Ok(())
}
Sourcepub fn serialize(&self) -> Result<SerializedDb<'static>, VectorscanRuntimeError>
pub fn serialize(&self) -> Result<SerializedDb<'static>, VectorscanRuntimeError>
Allocate a new memory region and serialize this in-memory state machine into it.
#[cfg(feature = "compiler")]
fn main() -> Result<(), vectorscan::error::VectorscanError> {
use vectorscan::{expression::*, flags::*, matchers::*};
// Create a db to match against:
let expr: Expression = "a+".parse()?;
let db = expr.compile(Flags::SOM_LEFTMOST, Mode::BLOCK)?;
// Serialize and deserialize the db:
let db = db.serialize()?.deserialize_db()?;
let mut scratch = db.allocate_scratch()?;
// Search against the db:
let mut matches: Vec<&str> = Vec::new();
scratch
.scan_sync(&db, "aardvark".into(), |Match { source, .. }| {
matches.push(unsafe { source.as_str() });
MatchResult::Continue
})?;
assert_eq!(&matches, &["a", "aa", "a"]);
Ok(())
}
Source§impl Database
§Pattern Compilers
Vectorscan supports compiling state machines for PCRE-like and literal
pattern strings, as well as parallel sets of those patterns (although note
that literal and non-literal patterns cannot be mixed). Each compile method
supports a subset of all Flags
arguments, documented in each method.
impl Database
§Pattern Compilers
Vectorscan supports compiling state machines for PCRE-like and literal
pattern strings, as well as parallel sets of those patterns (although note
that literal and non-literal patterns cannot be mixed). Each compile method
supports a subset of all Flags
arguments, documented in each method.
§Platform Compatibility
Each method also accepts an optional Platform
object,
which is used to select processor features to compile the database for.
While the default of None
will enable all features available to the
current processor, some features can be disabled in order to produce a
database which can execute on a wider variety of target platforms
after being deserialized from a remote source.
#[cfg(all(feature = "compiler", feature = "stream"))]
fn main() -> Result<(), vectorscan::error::VectorscanError> {
use vectorscan::{expression::*, flags::{*, platform::*}, database::*};
use std::slice;
let expr: Expression = "a+".parse()?;
// Verify that the current platform has AVX2 instructions, and make a db:
let plat = Platform::local()?;
assert!(plat.cpu_features.contains(&CpuFeatures::AVX2));
assert!(plat != Platform::GENERIC);
let db_with_avx2 = Database::compile(
&expr,
Flags::default(),
Mode::STREAM,
Some(&plat),
)?;
// The only specialized instructions we have available are AVX2:
assert_eq!(CpuFeatures::NONE, plat.cpu_features & !CpuFeatures::AVX2);
// Avoid using AVX2 instructions:
let db_no_avx2 = Database::compile(
&expr,
Flags::default(),
Mode::STREAM,
Some(&Platform::GENERIC),
)?;
// Instruction selection does not affect the size of the state machine:
assert!(db_with_avx2.database_size()? == db_no_avx2.database_size()?);
assert!(db_with_avx2.stream_size()? == db_no_avx2.stream_size()?);
// Now create a db with None for the platform:
let db_local = Database::compile(&expr, Flags::default(), Mode::STREAM, None)?;
assert!(db_with_avx2.database_size()? == db_local.database_size()?);
let n = db_with_avx2.database_size()?;
// Using None produces the same db as Platform::local():
assert!(db_with_avx2.info()? == db_local.info()?);
assert!(db_no_avx2.info()? != db_local.info()?);
// The "same" db does *not* apply to the in-memory representation:
let db_data_1 = unsafe { slice::from_raw_parts(
db_with_avx2.as_ref_native() as *const NativeDb as *const u8,
n,
)};
let db_data_2 = unsafe { slice::from_raw_parts(
db_no_avx2.as_ref_native() as *const NativeDb as *const u8,
n,
)};
let db_data_3 = unsafe { slice::from_raw_parts(
db_local.as_ref_native() as *const NativeDb as *const u8,
n,
)};
assert!(db_data_1 != db_data_3);
assert!(db_data_1 != db_data_2);
Ok(())
}
§Dynamic Memory Allocation
These methods allocate a new region of memory using the db allocator (which
can be overridden with crate::alloc::set_db_allocator()
). That
allocation can be manipulated as described in Managing
Allocations.
Sourcepub fn compile(
expression: &Expression,
flags: Flags,
mode: Mode,
platform: Option<&Platform>,
) -> Result<Self, VectorscanCompileError>
Available on crate feature compiler
only.
pub fn compile( expression: &Expression, flags: Flags, mode: Mode, platform: Option<&Platform>, ) -> Result<Self, VectorscanCompileError>
compiler
only.Single pattern compiler.
§Accepted Flags
use vectorscan::{expression::*, flags::*, database::*, matchers::*};
let expr: Expression = "hell(o)?".parse()?;
let db = Database::compile(&expr, Flags::default(), Mode::BLOCK, None)?;
let mut scratch = db.allocate_scratch()?;
let mut matches: Vec<&str> = Vec::new();
scratch
.scan_sync(&db, "hello".into(), |m| {
matches.push(unsafe { m.source.as_str() });
MatchResult::Continue
})?;
assert_eq!(&matches, &["hell", "hello"]);
Sourcepub fn compile_multi(
expression_set: &ExpressionSet<'_>,
mode: Mode,
platform: Option<&Platform>,
) -> Result<Self, VectorscanCompileError>
Available on crate feature compiler
only.
pub fn compile_multi( expression_set: &ExpressionSet<'_>, mode: Mode, platform: Option<&Platform>, ) -> Result<Self, VectorscanCompileError>
compiler
only.Multiple pattern compiler.
§Accepted Flags
use vectorscan::{expression::*, flags::*, database::*, matchers::*};
let a_expr: Expression = "a+".parse()?;
let b_expr: Expression = "b+".parse()?;
// Example of providing ExprExt info (not available in ::compile()!):
let ext = ExprExt::from_min_length(1);
let expr_set = ExpressionSet::from_exprs([&a_expr, &b_expr])
.with_ids([ExprId(1), ExprId(2)])
.with_exts([None, Some(&ext)]);
let db = Database::compile_multi(&expr_set, Mode::BLOCK, None)?;
let mut scratch = db.allocate_scratch()?;
let mut matches: Vec<&str> = Vec::new();
scratch
.scan_sync(&db, "aardvark".into(), |m| {
matches.push(unsafe { m.source.as_str() });
MatchResult::Continue
})?;
assert_eq!(&matches, &["a", "aa", "aardva"]);
matches.clear();
scratch
.scan_sync(&db, "imbibe".into(), |m| {
matches.push(unsafe { m.source.as_str() });
MatchResult::Continue
})?;
assert_eq!(&matches, &["imb", "imbib"]);
Sourcepub fn compile_literal(
literal: &Literal,
flags: Flags,
mode: Mode,
platform: Option<&Platform>,
) -> Result<Self, VectorscanCompileError>
Available on crate feature compiler
only.
pub fn compile_literal( literal: &Literal, flags: Flags, mode: Mode, platform: Option<&Platform>, ) -> Result<Self, VectorscanCompileError>
compiler
only.Single literal compiler.
§Accepted Flags
use vectorscan::{expression::*, flags::*, database::*, matchers::*};
let expr: Literal = "he\0ll".parse()?;
let db = Database::compile_literal(&expr, Flags::default(), Mode::BLOCK, None)?;
let mut scratch = db.allocate_scratch()?;
let mut matches: Vec<&str> = Vec::new();
scratch
.scan_sync(&db, "he\0llo".into(), |m| {
matches.push(unsafe { m.source.as_str() });
MatchResult::Continue
})?;
assert_eq!(&matches, &["he\0ll"]);
Sourcepub fn compile_multi_literal(
literal_set: &LiteralSet<'_>,
mode: Mode,
platform: Option<&Platform>,
) -> Result<Self, VectorscanCompileError>
Available on crate feature compiler
only.
pub fn compile_multi_literal( literal_set: &LiteralSet<'_>, mode: Mode, platform: Option<&Platform>, ) -> Result<Self, VectorscanCompileError>
compiler
only.Multiple literal compiler.
§Accepted Flags
use vectorscan::{expression::*, flags::*, database::*, matchers::*};
let hell_lit: Literal = "he\0ll".parse()?;
let free_lit: Literal = "fr\0e\0e".parse()?;
let lit_set = LiteralSet::from_lits([&hell_lit, &free_lit])
.with_flags([Flags::default(), Flags::default()])
.with_ids([ExprId(2), ExprId(1)]);
let db = Database::compile_multi_literal(&lit_set, Mode::BLOCK, None)?;
let mut scratch = db.allocate_scratch()?;
let mut matches: Vec<(u32, &str)> = Vec::new();
scratch
.scan_sync(
&db,
"he\0llo".into(),
|Match { id: ExpressionIndex(id), source, .. }| {
matches.push((id, unsafe { source.as_str() }));
MatchResult::Continue
})?;
assert_eq!(&matches, &[(2, "he\0ll")]);
matches.clear();
scratch
.scan_sync(
&db,
"fr\0e\0edom".into(),
|Match { id: ExpressionIndex(id), source, .. }| {
matches.push((id, unsafe { source.as_str() }));
MatchResult::Continue
})?;
assert_eq!(&matches, &[(1, "fr\0e\0e")]);
Source§impl Database
§Introspection
These methods extract various bits of runtime information from the db.
impl Database
§Introspection
These methods extract various bits of runtime information from the db.
Sourcepub fn database_size(&self) -> Result<usize, VectorscanRuntimeError>
pub fn database_size(&self) -> Result<usize, VectorscanRuntimeError>
Return the size of the db allocation.
Using Flags::UCP
explodes the size of character classes, which
increases the size of the state machine:
#[cfg(feature = "compiler")]
fn main() -> Result<(), vectorscan::error::VectorscanError> {
use vectorscan::{expression::*, flags::*};
let expr: Expression = r"\w".parse()?;
let utf8_db = expr.compile(Flags::UTF8 | Flags::UCP, Mode::BLOCK)?;
let ascii_db = expr.compile(Flags::default(), Mode::BLOCK)?;
// Including UTF-8 classes increases the size:
assert!(utf8_db.database_size()? > ascii_db.database_size()?);
Ok(())
}
This size corresponds to the requested allocation size passed to the db allocator:
#[cfg(all(feature = "alloc", feature = "compiler"))]
fn main() -> Result<(), vectorscan::error::VectorscanError> {
use vectorscan::{expression::*, flags::*, alloc::*};
use std::alloc::System;
// Wrap the standard Rust System allocator.
let tracker = LayoutTracker::new(System.into());
// Register it as the allocator for databases.
assert!(set_db_allocator(tracker)?.is_none());
let expr: Expression = r"\w".parse()?;
let utf8_db = expr.compile(Flags::UTF8 | Flags::UCP, Mode::BLOCK)?;
// Get the database allocator we just registered and view its live allocations:
let allocs = get_db_allocator().as_ref().unwrap().current_allocations();
// Verify that only the single known db was allocated:
assert_eq!(1, allocs.len());
let (_p, layout) = allocs[0];
// Verify that the allocation size is the same as reported:
assert_eq!(layout.size(), utf8_db.database_size()?);
Ok(())
}
Sourcepub fn stream_size(&self) -> Result<usize, VectorscanRuntimeError>
Available on crate feature stream
only.
pub fn stream_size(&self) -> Result<usize, VectorscanRuntimeError>
stream
only.Return the amount of space necessary to maintain stream state for this db.
#[cfg(feature = "compiler")]
fn main() -> Result<(), vectorscan::error::VectorscanError> {
use vectorscan::{expression::*, flags::*};
let expr: Expression = r"\w".parse()?;
let utf8_db = expr.compile(Flags::UTF8 | Flags::UCP, Mode::STREAM)?;
let ascii_db = expr.compile(Flags::default(), Mode::STREAM)?;
// Including UTF-8 classes increases both db and stream size:
assert!(utf8_db.database_size()? > ascii_db.database_size()?);
assert!(utf8_db.stream_size()? > ascii_db.stream_size()?);
Ok(())
}
This size corresponds to the requested allocation size passed to the stream allocator:
#[cfg(all(feature = "alloc", feature = "compiler"))]
fn main() -> Result<(), vectorscan::error::VectorscanError> {
use vectorscan::{expression::*, flags::*, alloc::*, stream::*};
use std::alloc::System;
// Wrap the standard Rust System allocator.
let tracker = LayoutTracker::new(System.into());
// Register it as the allocator for streams.
assert!(set_stream_allocator(tracker)?.is_none());
let expr: Expression = r"\w".parse()?;
let db = expr.compile(Flags::UTF8 | Flags::UCP, Mode::STREAM)?;
let _stream = LiveStream::open(&db)?;
// Get the stream allocator we just registered and view its live allocations:
let allocs = get_stream_allocator().as_ref().unwrap().current_allocations();
// Verify that only the single known stream was allocated:
assert_eq!(1, allocs.len());
let (_p, layout) = allocs[0];
// Verify that the allocation size is the same as reported:
assert_eq!(layout.size(), db.stream_size()?);
Ok(())
}
Sourcepub fn info(&self) -> Result<DbInfo, VectorscanRuntimeError>
pub fn info(&self) -> Result<DbInfo, VectorscanRuntimeError>
Extract metadata about the current database into a new string allocation.
This is a convenience method that simply calls
DbInfo::extract_db_info()
.
#[cfg(feature = "compiler")]
fn main() -> Result<(), vectorscan::error::VectorscanError> {
use vectorscan::{expression::*, flags::*};
let expr: Expression = "a+".parse()?;
let db = expr.compile(Flags::default(), Mode::BLOCK)?;
let info = db.info()?;
assert_eq!(info.as_str(), "Version: 5.4.11 Features: AVX2 Mode: BLOCK");
Ok(())
}
Source§impl Database
§Managing Allocations
These methods provide access to the underlying memory allocation containing
the data for the in-memory state machine. They can be used along with
SerializedDb::deserialize_db_at()
to control the memory location used
for the state machine, or to preserve db allocations across weird lifetime
constraints.
impl Database
§Managing Allocations
These methods provide access to the underlying memory allocation containing
the data for the in-memory state machine. They can be used along with
SerializedDb::deserialize_db_at()
to control the memory location used
for the state machine, or to preserve db allocations across weird lifetime
constraints.
Note that Self::database_size()
can be used to determine the size of the
memory allocation pointed to by Self::as_ref_native()
and
Self::as_mut_native()
.
Sourcepub const unsafe fn from_native(p: *mut NativeDb) -> Self
pub const unsafe fn from_native(p: *mut NativeDb) -> Self
Wrap the provided allocation p
.
§Safety
The pointer p
must point to an initialized db allocation prepared by one
of the compile or deserialize methods.
This method also makes it especially easy to create multiple references to
the same allocation, which will then cause a double free when
Self::try_drop()
is called more than once for the same db allocation.
To avoid this, wrap the result in a ManuallyDrop
:
#[cfg(feature = "compiler")]
fn main() -> Result<(), vectorscan::error::VectorscanError> {
use vectorscan::{expression::*, flags::*, matchers::*, database::*, state::*};
use std::mem::ManuallyDrop;
// Compile a legitimate db:
let expr: Expression = "a+".parse()?;
let mut db = expr.compile(Flags::SOM_LEFTMOST, Mode::BLOCK)?;
// Create two new references to that allocation,
// wrapped to avoid calling the drop code:
let db_ptr: *mut NativeDb = db.as_mut_native();
let db_ref_1 = ManuallyDrop::new(unsafe { Database::from_native(db_ptr) });
let db_ref_2 = ManuallyDrop::new(unsafe { Database::from_native(db_ptr) });
// Both db references are valid and can be used for matching.
let mut scratch = Scratch::blank();
scratch.setup_for_db(&db_ref_1)?;
scratch.setup_for_db(&db_ref_2)?;
let mut matches: Vec<&str> = Vec::new();
scratch
.scan_sync(&db_ref_1, "aardvark".into(), |Match { source, .. }| {
matches.push(unsafe { source.as_str() });
MatchResult::Continue
})?;
scratch
.scan_sync(&db_ref_2, "aardvark".into(), |Match { source, .. }| {
matches.push(unsafe { source.as_str() });
MatchResult::Continue
})?;
assert_eq!(&matches, &["a", "aa", "a", "a", "aa", "a"]);
Ok(())
}
Sourcepub fn as_ref_native(&self) -> &NativeDb
pub fn as_ref_native(&self) -> &NativeDb
Get a read-only reference to the db allocation.
This method is mostly used internally and cast to a pointer to provide to the vectorscan native library methods.
Sourcepub fn as_mut_native(&mut self) -> &mut NativeDb
pub fn as_mut_native(&mut self) -> &mut NativeDb
Get a mutable reference to the db allocation.
The result of this method can be cast to a pointer and provided to
Self::from_native()
.
Sourcepub unsafe fn try_drop(&mut self) -> Result<(), VectorscanRuntimeError>
pub unsafe fn try_drop(&mut self) -> Result<(), VectorscanRuntimeError>
Free the underlying db allocation.
§Safety
This method must be called at most once over the lifetime of each db
allocation. It is called by default on drop, so
ManuallyDrop
is recommended to wrap instances
that reference external data in order to avoid attempting to free the
referenced data.
§Only Frees Memory
This method performs no processing other than freeing the allocated
memory, so it can be skipped without leaking resources if the
underlying NativeDb
allocation is freed by some other means.