use std::fmt;
use std::io;
use std::iter::{self, FromIterator};
use crate::automaton::{AlwaysMatch, Automaton};
use crate::raw;
use crate::stream::{IntoStreamer, Streamer};
use crate::Result;
#[derive(Clone)]
pub struct Set<D>(raw::Fst<D>);
impl Set<Vec<u8>> {
pub fn from_iter<T, I>(iter: I) -> Result<Set<Vec<u8>>>
where
T: AsRef<[u8]>,
I: IntoIterator<Item = T>,
{
let mut builder = SetBuilder::memory();
builder.extend_iter(iter)?;
Set::new(builder.into_inner()?)
}
}
impl<D: AsRef<[u8]>> Set<D> {
pub fn new(data: D) -> Result<Set<D>> {
raw::Fst::new(data).map(Set)
}
pub fn contains<K: AsRef<[u8]>>(&self, key: K) -> bool {
self.0.contains_key(key)
}
#[inline]
pub fn stream(&self) -> Stream<'_> {
Stream(self.0.stream())
}
#[inline]
pub fn range(&self) -> StreamBuilder<'_> {
StreamBuilder(self.0.range())
}
pub fn search<A: Automaton>(&self, aut: A) -> StreamBuilder<'_, A> {
StreamBuilder(self.0.search(aut))
}
#[cfg_attr(
feature = "levenshtein",
doc = r##"
# Example
An implementation of fuzzy search using Levenshtein automata can be used
to search sets:
```rust
use fst::automaton::Levenshtein;
use fst::{IntoStreamer, Streamer, Set};
# fn main() { example().unwrap(); }
fn example() -> Result<(), Box<dyn std::error::Error>> {
let set = Set::from_iter(vec![
"foo",
"foob",
"foobar",
"fozb",
]).unwrap();
let query = Levenshtein::new("foo", 2)?;
let mut stream = set.search_with_state(&query).into_stream();
let mut vs = vec![];
while let Some((v, s)) = stream.next() {
vs.push((String::from_utf8(v.to_vec())?, s));
}
// Currently, there isn't much interesting that you can do with the states.
assert_eq!(vs, vec![
("foo".to_string(), Some(183)),
("foob".to_string(), Some(123)),
("fozb".to_string(), Some(83)),
]);
Ok(())
}
```
"##
)]
pub fn search_with_state<A: Automaton>(
&self,
aut: A,
) -> StreamWithStateBuilder<'_, A> {
StreamWithStateBuilder(self.0.search_with_state(aut))
}
#[inline]
pub fn len(&self) -> usize {
self.0.len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
#[inline]
pub fn op(&self) -> OpBuilder<'_> {
OpBuilder::new().add(self)
}
pub fn is_disjoint<'f, I, S>(&self, stream: I) -> bool
where
I: for<'a> IntoStreamer<'a, Into = S, Item = &'a [u8]>,
S: 'f + for<'a> Streamer<'a, Item = &'a [u8]>,
{
self.0.is_disjoint(StreamZeroOutput(stream.into_stream()))
}
pub fn is_subset<'f, I, S>(&self, stream: I) -> bool
where
I: for<'a> IntoStreamer<'a, Into = S, Item = &'a [u8]>,
S: 'f + for<'a> Streamer<'a, Item = &'a [u8]>,
{
self.0.is_subset(StreamZeroOutput(stream.into_stream()))
}
pub fn is_superset<'f, I, S>(&self, stream: I) -> bool
where
I: for<'a> IntoStreamer<'a, Into = S, Item = &'a [u8]>,
S: 'f + for<'a> Streamer<'a, Item = &'a [u8]>,
{
self.0.is_superset(StreamZeroOutput(stream.into_stream()))
}
#[inline]
pub fn as_fst(&self) -> &raw::Fst<D> {
&self.0
}
#[inline]
pub fn into_fst(self) -> raw::Fst<D> {
self.0
}
#[inline]
pub fn map_data<F, T>(self, f: F) -> Result<Set<T>>
where
F: FnMut(D) -> T,
T: AsRef<[u8]>,
{
self.into_fst().map_data(f).map(Set::from)
}
}
impl Default for Set<Vec<u8>> {
#[inline]
fn default() -> Set<Vec<u8>> {
Set::from_iter(iter::empty::<&[u8]>()).unwrap()
}
}
impl<D: AsRef<[u8]>> fmt::Debug for Set<D> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Set([")?;
let mut stream = self.stream();
let mut first = true;
while let Some(key) = stream.next() {
if !first {
write!(f, ", ")?;
}
first = false;
write!(f, "{}", String::from_utf8_lossy(key))?;
}
write!(f, "])")
}
}
impl<D: AsRef<[u8]>> AsRef<raw::Fst<D>> for Set<D> {
#[inline]
fn as_ref(&self) -> &raw::Fst<D> {
&self.0
}
}
impl<'s, 'a, D: AsRef<[u8]>> IntoStreamer<'a> for &'s Set<D> {
type Item = &'a [u8];
type Into = Stream<'s>;
#[inline]
fn into_stream(self) -> Stream<'s> {
Stream(self.0.stream())
}
}
impl<D: AsRef<[u8]>> From<raw::Fst<D>> for Set<D> {
#[inline]
fn from(fst: raw::Fst<D>) -> Set<D> {
Set(fst)
}
}
pub struct SetBuilder<W>(raw::Builder<W>);
impl SetBuilder<Vec<u8>> {
#[inline]
pub fn memory() -> SetBuilder<Vec<u8>> {
SetBuilder(raw::Builder::memory())
}
#[inline]
pub fn into_set(self) -> Set<Vec<u8>> {
Set(self.0.into_fst())
}
}
impl<W: io::Write> SetBuilder<W> {
pub fn new(wtr: W) -> Result<SetBuilder<W>> {
raw::Builder::new_type(wtr, 0).map(SetBuilder)
}
pub fn insert<K: AsRef<[u8]>>(&mut self, key: K) -> Result<()> {
self.0.add(key)
}
pub fn extend_iter<T, I>(&mut self, iter: I) -> Result<()>
where
T: AsRef<[u8]>,
I: IntoIterator<Item = T>,
{
for key in iter {
self.0.add(key)?;
}
Ok(())
}
pub fn extend_stream<'f, I, S>(&mut self, stream: I) -> Result<()>
where
I: for<'a> IntoStreamer<'a, Into = S, Item = &'a [u8]>,
S: 'f + for<'a> Streamer<'a, Item = &'a [u8]>,
{
self.0.extend_stream(StreamZeroOutput(stream.into_stream()))
}
pub fn finish(self) -> Result<()> {
self.0.finish()
}
pub fn into_inner(self) -> Result<W> {
self.0.into_inner()
}
pub fn get_ref(&self) -> &W {
self.0.get_ref()
}
pub fn bytes_written(&self) -> u64 {
self.0.bytes_written()
}
}
pub struct Stream<'s, A = AlwaysMatch>(raw::Stream<'s, A>)
where
A: Automaton;
impl<'s, A: Automaton> Stream<'s, A> {
pub fn into_strs(self) -> Result<Vec<String>> {
self.0.into_str_keys()
}
pub fn into_bytes(self) -> Vec<Vec<u8>> {
self.0.into_byte_keys()
}
}
impl<'a, 's, A: Automaton> Streamer<'a> for Stream<'s, A> {
type Item = &'a [u8];
fn next(&'a mut self) -> Option<&'a [u8]> {
self.0.next().map(|(key, _)| key)
}
}
pub struct StreamWithState<'m, A = AlwaysMatch>(raw::StreamWithState<'m, A>)
where
A: Automaton;
impl<'a, 'm, A: 'a + Automaton> Streamer<'a> for StreamWithState<'m, A>
where
A::State: Clone,
{
type Item = (&'a [u8], A::State);
fn next(&'a mut self) -> Option<(&'a [u8], A::State)> {
self.0.next().map(|(key, _, state)| (key, state))
}
}
pub struct StreamBuilder<'s, A = AlwaysMatch>(raw::StreamBuilder<'s, A>);
impl<'s, A: Automaton> StreamBuilder<'s, A> {
pub fn ge<T: AsRef<[u8]>>(self, bound: T) -> StreamBuilder<'s, A> {
StreamBuilder(self.0.ge(bound))
}
pub fn gt<T: AsRef<[u8]>>(self, bound: T) -> StreamBuilder<'s, A> {
StreamBuilder(self.0.gt(bound))
}
pub fn le<T: AsRef<[u8]>>(self, bound: T) -> StreamBuilder<'s, A> {
StreamBuilder(self.0.le(bound))
}
pub fn lt<T: AsRef<[u8]>>(self, bound: T) -> StreamBuilder<'s, A> {
StreamBuilder(self.0.lt(bound))
}
}
impl<'s, 'a, A: Automaton> IntoStreamer<'a> for StreamBuilder<'s, A> {
type Item = &'a [u8];
type Into = Stream<'s, A>;
fn into_stream(self) -> Stream<'s, A> {
Stream(self.0.into_stream())
}
}
pub struct StreamWithStateBuilder<'s, A = AlwaysMatch>(
raw::StreamWithStateBuilder<'s, A>,
);
impl<'s, A: Automaton> StreamWithStateBuilder<'s, A> {
pub fn ge<T: AsRef<[u8]>>(
self,
bound: T,
) -> StreamWithStateBuilder<'s, A> {
StreamWithStateBuilder(self.0.ge(bound))
}
pub fn gt<T: AsRef<[u8]>>(
self,
bound: T,
) -> StreamWithStateBuilder<'s, A> {
StreamWithStateBuilder(self.0.gt(bound))
}
pub fn le<T: AsRef<[u8]>>(
self,
bound: T,
) -> StreamWithStateBuilder<'s, A> {
StreamWithStateBuilder(self.0.le(bound))
}
pub fn lt<T: AsRef<[u8]>>(
self,
bound: T,
) -> StreamWithStateBuilder<'s, A> {
StreamWithStateBuilder(self.0.lt(bound))
}
}
impl<'s, 'a, A: 'a + Automaton> IntoStreamer<'a>
for StreamWithStateBuilder<'s, A>
where
A::State: Clone,
{
type Item = (&'a [u8], A::State);
type Into = StreamWithState<'s, A>;
fn into_stream(self) -> StreamWithState<'s, A> {
StreamWithState(self.0.into_stream())
}
}
pub struct OpBuilder<'s>(raw::OpBuilder<'s>);
impl<'s> OpBuilder<'s> {
#[inline]
pub fn new() -> OpBuilder<'s> {
OpBuilder(raw::OpBuilder::new())
}
pub fn add<I, S>(mut self, streamable: I) -> OpBuilder<'s>
where
I: for<'a> IntoStreamer<'a, Into = S, Item = &'a [u8]>,
S: 's + for<'a> Streamer<'a, Item = &'a [u8]>,
{
self.push(streamable);
self
}
pub fn push<I, S>(&mut self, streamable: I)
where
I: for<'a> IntoStreamer<'a, Into = S, Item = &'a [u8]>,
S: 's + for<'a> Streamer<'a, Item = &'a [u8]>,
{
self.0.push(StreamZeroOutput(streamable.into_stream()));
}
#[inline]
pub fn union(self) -> Union<'s> {
Union(self.0.union())
}
#[inline]
pub fn intersection(self) -> Intersection<'s> {
Intersection(self.0.intersection())
}
#[inline]
pub fn difference(self) -> Difference<'s> {
Difference(self.0.difference())
}
#[inline]
pub fn symmetric_difference(self) -> SymmetricDifference<'s> {
SymmetricDifference(self.0.symmetric_difference())
}
}
impl<'f, I, S> Extend<I> for OpBuilder<'f>
where
I: for<'a> IntoStreamer<'a, Into = S, Item = &'a [u8]>,
S: 'f + for<'a> Streamer<'a, Item = &'a [u8]>,
{
fn extend<T>(&mut self, it: T)
where
T: IntoIterator<Item = I>,
{
for stream in it {
self.push(stream);
}
}
}
impl<'f, I, S> FromIterator<I> for OpBuilder<'f>
where
I: for<'a> IntoStreamer<'a, Into = S, Item = &'a [u8]>,
S: 'f + for<'a> Streamer<'a, Item = &'a [u8]>,
{
fn from_iter<T>(it: T) -> OpBuilder<'f>
where
T: IntoIterator<Item = I>,
{
let mut op = OpBuilder::new();
op.extend(it);
op
}
}
pub struct Union<'s>(raw::Union<'s>);
impl<'a, 's> Streamer<'a> for Union<'s> {
type Item = &'a [u8];
#[inline]
fn next(&'a mut self) -> Option<&'a [u8]> {
self.0.next().map(|(key, _)| key)
}
}
pub struct Intersection<'s>(raw::Intersection<'s>);
impl<'a, 's> Streamer<'a> for Intersection<'s> {
type Item = &'a [u8];
#[inline]
fn next(&'a mut self) -> Option<&'a [u8]> {
self.0.next().map(|(key, _)| key)
}
}
pub struct Difference<'s>(raw::Difference<'s>);
impl<'a, 's> Streamer<'a> for Difference<'s> {
type Item = &'a [u8];
#[inline]
fn next(&'a mut self) -> Option<&'a [u8]> {
self.0.next().map(|(key, _)| key)
}
}
pub struct SymmetricDifference<'s>(raw::SymmetricDifference<'s>);
impl<'a, 's> Streamer<'a> for SymmetricDifference<'s> {
type Item = &'a [u8];
#[inline]
fn next(&'a mut self) -> Option<&'a [u8]> {
self.0.next().map(|(key, _)| key)
}
}
struct StreamZeroOutput<S>(S);
impl<'a, S: Streamer<'a>> Streamer<'a> for StreamZeroOutput<S> {
type Item = (S::Item, raw::Output);
fn next(&'a mut self) -> Option<(S::Item, raw::Output)> {
self.0.next().map(|key| (key, raw::Output::zero()))
}
}
#[cfg(test)]
mod tests {
use super::OpBuilder;
use crate::Streamer;
#[test]
fn no_fsts() {
struct Iter<'a> {
i: usize,
xs: Vec<&'a [u8]>,
}
impl<'a> Iter<'a> {
fn new(xs: Vec<&'a [u8]>) -> Iter<'a> {
Iter { i: 0, xs }
}
}
impl<'a, 's> Streamer<'a> for Iter<'s> {
type Item = &'a [u8];
fn next(&'a mut self) -> Option<&'a [u8]> {
if self.i >= self.xs.len() {
None
} else {
let i = self.i;
self.i += 1;
Some(self.xs[i])
}
}
}
let mut stream = OpBuilder::new()
.add(Iter::new(vec![
&b"bar"[..],
&b"baz"[..],
&b"foo"[..],
&b"fubar"[..],
&b"quux"[..],
]))
.add(Iter::new(vec![&b"bar"[..], &b"foofoo"[..], &b"fubar"[..]]))
.intersection();
let mut got = vec![];
while let Some(x) = stream.next() {
got.push(x.to_vec());
}
assert_eq!(got, vec![&b"bar"[..], &b"fubar"[..]]);
}
}