pub struct ScratchStreamSink<'code> {
pub live: Box<dyn Handle<R = LiveStream>>,
pub matcher: StreamMatcher<'code>,
pub scratch: Box<dyn Handle<R = Scratch>>,
}
stream
only.Expand description
A wrapper around all the state needed to execute a stream search.
By holding handles to Self::live
and Self::scratch
, the stream
scanning API can be made quite fluent, without as many parameters per call:
#[cfg(feature = "compiler")]
fn main() -> Result<(), vectorscan::error::VectorscanError> {
use vectorscan::{expression::*, flags::*, stream::*, matchers::*};
use std::{ops::Range, mem};
let expr: Expression = "a+".parse()?;
let db = expr.compile(Flags::SOM_LEFTMOST, Mode::STREAM | Mode::SOM_HORIZON_LARGE)?;
let scratch = db.allocate_scratch()?;
let live = db.allocate_stream()?;
// Create the `matches` vector which is mutably captured in the dyn closure.
let mut matches: Vec<StreamMatch> = Vec::new();
// Capture `matches` into `match_fn`;
// in this case, `match_fn` is an unboxed stack-allocated closure.
let mut match_fn = |m| {
matches.push(m);
MatchResult::Continue
};
// `matcher` now keeps the reference to `matches` alive
// in rustc's local lifetime tracking.
let matcher = StreamMatcher::new(&mut match_fn);
let mut sink = ScratchStreamSink::new(live, matcher, scratch);
sink.scan("aardvark".into())?;
sink.flush_eod()?;
// This will also drop `matcher`, which means `match_fn`
// holds the only reference to `matches`.
mem::drop(sink);
// This could also be performed by explicitly
// introducing a scope with `{}`.
// Since `match_fn` is otherwise unused outside of `matcher`,
// rustc can statically determine that no other mutable reference
// to `matches` exists, so it "unlocks" the value
// and lets us consume it with `.into_iter()`.
let matches: Vec<Range<usize>> = matches
.into_iter()
.map(|m| m.range.into())
.collect();
assert_eq!(&matches, &[0..1, 0..2, 5..6]);
Ok(())
}
Fields§
§live: Box<dyn Handle<R = LiveStream>>
Cloneable handle to a stateful stream.
matcher: StreamMatcher<'code>
Type-erased wrapper over the user-provided match callback.
scratch: Box<dyn Handle<R = Scratch>>
Cloneable handle to a scratch space initialized for the same db as
Self::live
.
Implementations§
Source§impl<'code> ScratchStreamSink<'code>
impl<'code> ScratchStreamSink<'code>
Sourcepub fn new(
live: impl Handle<R = LiveStream>,
matcher: StreamMatcher<'code>,
scratch: impl Handle<R = Scratch>,
) -> Self
pub fn new( live: impl Handle<R = LiveStream>, matcher: StreamMatcher<'code>, scratch: impl Handle<R = Scratch>, ) -> Self
Collate all the state necessary to match against a stream.
Sourcepub fn scan<'data>(
&mut self,
data: ByteSlice<'data>,
) -> Result<(), VectorscanRuntimeError>
pub fn scan<'data>( &mut self, data: ByteSlice<'data>, ) -> Result<(), VectorscanRuntimeError>
Write a single contiguous string into the automaton.
#[cfg(feature = "compiler")]
fn main() -> Result<(), vectorscan::error::VectorscanError> {
use vectorscan::{expression::*, flags::*, stream::*, matchers::*};
use std::ops::Range;
let expr: Expression = "a+".parse()?;
let db = expr.compile(Flags::SOM_LEFTMOST, Mode::STREAM | Mode::SOM_HORIZON_LARGE)?;
let scratch = db.allocate_scratch()?;
let live = db.allocate_stream()?;
// Create the `matches` vector which is mutably captured in the dyn closure.
let mut matches: Vec<StreamMatch> = Vec::new();
// Capture `matches` into `match_fn`;
// in this case, `match_fn` is an unboxed stack-allocated closure.
let mut match_fn = |m| {
matches.push(m);
MatchResult::Continue
};
{
// `matcher` now keeps the reference to `matches` alive
// in rustc's local lifetime tracking.
let matcher = StreamMatcher::new(&mut match_fn);
let mut sink = ScratchStreamSink::new(live, matcher, scratch);
sink.scan("aardvarka".into())?;
sink.scan("a".into())?;
sink.flush_eod()?;
}
// `matches` is now "unlocked" by rustc after `matcher` was dropped!
let matches: Vec<Range<usize>> = matches
.into_iter()
.map(|m| m.range.into())
.collect();
// 8..10 is across a non-contiguous input boundary!
assert_eq!(&matches, &[0..1, 0..2, 5..6, 8..9, 8..10]);
Ok(())
}
Sourcepub fn scan_vectored<'data>(
&mut self,
data: VectoredByteSlices<'data, 'data>,
) -> Result<(), VectorscanRuntimeError>
Available on crate feature vectored
only.
pub fn scan_vectored<'data>( &mut self, data: VectoredByteSlices<'data, 'data>, ) -> Result<(), VectorscanRuntimeError>
vectored
only.Write vectored string data into the automaton.
#[cfg(feature = "compiler")]
fn main() -> Result<(), vectorscan::error::VectorscanError> {
use vectorscan::{expression::*, flags::*, stream::*, matchers::*, sources::*};
use std::ops::Range;
let expr: Expression = "a+".parse()?;
let db = expr.compile(Flags::SOM_LEFTMOST, Mode::STREAM | Mode::SOM_HORIZON_LARGE)?;
let scratch = db.allocate_scratch()?;
let live = db.allocate_stream()?;
let input: [ByteSlice; 2] = [
"aardvarka".into(),
"asdf".into(),
];
// Create the `matches` vector which is mutably captured in the dyn closure.
let mut matches: Vec<StreamMatch> = Vec::new();
// Capture `matches` into `match_fn`;
// in this case, `match_fn` is an unboxed stack-allocated closure.
let mut match_fn = |m| {
matches.push(m);
MatchResult::Continue
};
{
// `matcher` now keeps the reference to `matches` alive
// in rustc's local lifetime tracking.
let matcher = StreamMatcher::new(&mut match_fn);
let mut sink = ScratchStreamSink::new(live, matcher, scratch);
sink.scan_vectored(input.as_ref().into())?;
sink.flush_eod()?;
}
// `matches` is now "unlocked" by rustc after `matcher` was dropped!
let matches: Vec<Range<usize>> = matches
.into_iter()
.map(|m| m.range.into())
.collect();
// 8..10 is across a non-contiguous input boundary!
assert_eq!(&matches, &[0..1, 0..2, 5..6, 8..9, 8..10]);
Ok(())
}
Sourcepub fn flush_eod(&mut self) -> Result<(), VectorscanRuntimeError>
pub fn flush_eod(&mut self) -> Result<(), VectorscanRuntimeError>
Trigger any match callbacks that require matching against the end of data (EOD).
Expression::info()
returns a MatchAtEndBehavior
can be used to
determine whether this check is necessary. But it typically makes sense
to execute it exactly once at the end of every stream instead of trying
to optimize this away.
Sourcepub fn reset(&mut self) -> Result<(), VectorscanRuntimeError>
pub fn reset(&mut self) -> Result<(), VectorscanRuntimeError>
Reach into Self::live
and call LiveStream::reset()
.