use crate::{
database::Database,
error::{VectorscanCompileError, VectorscanRuntimeError},
flags::{ExtFlags, Flags, Mode},
hs,
};
use std::{
ffi::{CStr, CString},
fmt,
marker::PhantomData,
mem, ops,
os::raw::{c_char, c_uint, c_ulonglong},
ptr, slice, str,
};
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Expression(CString);
impl fmt::Display for Expression {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let b = self.as_bytes();
match str::from_utf8(b) {
Ok(s) => write!(f, "{}", s),
Err(_) => write!(f, "(non-utf8: {:?})", b),
}
}
}
impl Expression {
pub fn as_bytes(&self) -> &[u8] { self.0.as_bytes() }
pub(crate) fn as_ptr(&self) -> *const c_char { self.0.as_c_str().as_ptr() }
pub fn new(x: impl Into<Vec<u8>>) -> Result<Self, VectorscanCompileError> {
Ok(Self(CString::new(x)?))
}
pub fn info(&self, flags: Flags) -> Result<info::ExprInfo, VectorscanCompileError> {
let mut info = ptr::null_mut();
let mut compile_err = ptr::null_mut();
VectorscanRuntimeError::copy_from_native_compile_error(
unsafe {
hs::hs_expression_info(
self.as_ptr(),
flags.into_native(),
&mut info,
&mut compile_err,
)
},
compile_err,
)?;
let ret = info::ExprInfo::from_native(unsafe { *info });
unsafe {
crate::free_misc(info as *mut u8);
}
Ok(ret)
}
pub fn ext_info(
&self,
flags: Flags,
ext_flags: &ExprExt,
) -> Result<info::ExprInfo, VectorscanCompileError> {
let mut info = ptr::null_mut();
let mut compile_err = ptr::null_mut();
VectorscanRuntimeError::copy_from_native_compile_error(
unsafe {
hs::hs_expression_ext_info(
self.as_ptr(),
flags.into_native(),
ext_flags.as_ref_native(),
&mut info,
&mut compile_err,
)
},
compile_err,
)?;
let ret = info::ExprInfo::from_native(unsafe { *info });
unsafe {
crate::free_misc(info as *mut u8);
}
Ok(ret)
}
pub fn compile(&self, flags: Flags, mode: Mode) -> Result<Database, VectorscanCompileError> {
Database::compile(self, flags, mode, None)
}
}
impl str::FromStr for Expression {
type Err = VectorscanCompileError;
fn from_str(s: &str) -> Result<Self, Self::Err> { Self::new(s) }
}
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Literal(Vec<u8>);
impl fmt::Debug for Literal {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let b = self.as_bytes();
match str::from_utf8(b) {
Ok(s) => write!(f, "Literal({:?})", s),
Err(_) => write!(f, "Literal({:?})", b),
}
}
}
impl fmt::Display for Literal {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let b = self.as_bytes();
match str::from_utf8(b) {
Ok(s) => write!(f, "{}", s),
Err(_) => write!(f, "(non-utf8 literal: {:?})", b),
}
}
}
impl Literal {
pub fn as_bytes(&self) -> &[u8] { &self.0 }
pub(crate) fn as_ptr(&self) -> *const c_char {
unsafe { mem::transmute(self.as_bytes().as_ptr()) }
}
pub fn new(x: impl Into<Vec<u8>>) -> Result<Self, VectorscanCompileError> { Ok(Self(x.into())) }
pub fn compile(&self, flags: Flags, mode: Mode) -> Result<Database, VectorscanCompileError> {
Database::compile_literal(self, flags, mode, None)
}
}
impl str::FromStr for Literal {
type Err = VectorscanCompileError;
fn from_str(s: &str) -> Result<Self, Self::Err> { Self::new(s) }
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(transparent)]
pub struct ExprId(pub c_uint);
#[derive(Clone)]
pub struct ExpressionSet<'a> {
ptrs: Vec<*const c_char>,
flags: Option<Vec<Flags>>,
ids: Option<Vec<ExprId>>,
exts: Option<Vec<*const hs::hs_expr_ext>>,
_ph: PhantomData<&'a u8>,
}
impl<'a> fmt::Debug for ExpressionSet<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let exprs: Vec<&'a CStr> = self
.ptrs
.iter()
.map(|p| unsafe { CStr::from_ptr(*p) })
.collect();
let exts: Option<&[Option<&ExprExt>]> = self
.exts
.as_ref()
.map(|exts| unsafe { slice::from_raw_parts(mem::transmute(exts.as_ptr()), exprs.len()) });
write!(
f,
"ExpressionSet(exprs={:?}, flags={:?}, ids={:?}, exts={:?})",
exprs, &self.flags, &self.ids, exts,
)
}
}
impl<'a> ExpressionSet<'a> {
pub fn from_exprs(exprs: impl IntoIterator<Item=&'a Expression>) -> Self {
Self {
ptrs: exprs.into_iter().map(|e| e.as_ptr()).collect(),
flags: None,
ids: None,
exts: None,
_ph: PhantomData,
}
}
pub fn with_flags(mut self, flags: impl IntoIterator<Item=Flags>) -> Self {
let flags: Vec<_> = flags.into_iter().collect();
assert_eq!(self.len(), flags.len());
self.flags = Some(flags);
self
}
pub fn with_ids(mut self, ids: impl IntoIterator<Item=ExprId>) -> Self {
let ids: Vec<_> = ids.into_iter().collect();
assert_eq!(self.len(), ids.len());
self.ids = Some(ids);
self
}
pub fn with_exts(mut self, exts: impl IntoIterator<Item=Option<&'a ExprExt>>) -> Self {
let exts: Vec<*const hs::hs_expr_ext> = exts
.into_iter()
.map(|e| {
e.map(|e| e.as_ref_native() as *const hs::hs_expr_ext)
.unwrap_or(ptr::null())
})
.collect();
assert_eq!(self.len(), exts.len());
self.exts = Some(exts);
self
}
pub fn compile(self, mode: Mode) -> Result<Database, VectorscanCompileError> {
Database::compile_multi(&self, mode, None)
}
pub fn len(&self) -> usize { self.ptrs.len() }
pub fn is_empty(&self) -> bool { self.len() == 0 }
pub(crate) fn num_elements(&self) -> c_uint { self.len() as c_uint }
pub(crate) fn exts_ptr(&self) -> Option<*const *const hs::hs_expr_ext> {
self.exts.as_ref().map(|e| e.as_ptr())
}
pub(crate) fn expressions_ptr(&self) -> *const *const c_char { self.ptrs.as_ptr() }
pub(crate) fn flags_ptr(&self) -> *const c_uint {
self
.flags
.as_ref()
.map(|f| unsafe { mem::transmute(f.as_ptr()) })
.unwrap_or(ptr::null())
}
pub(crate) fn ids_ptr(&self) -> *const c_uint {
self
.ids
.as_ref()
.map(|i| unsafe { mem::transmute(i.as_ptr()) })
.unwrap_or(ptr::null())
}
}
pub mod info {
use crate::hs;
use displaydoc::Display;
use std::os::raw::{c_char, c_uint};
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(transparent)]
pub struct ExprWidth(pub usize);
impl ExprWidth {
pub(crate) const fn parse_min_width(x: c_uint) -> Self { Self(x as usize) }
pub(crate) const fn parse_max_width(x: c_uint) -> Option<Self> {
if x == c_uint::MAX {
None
} else {
Some(Self(x as usize))
}
}
}
#[derive(
Debug,
Display,
Copy,
Clone,
PartialEq,
Eq,
PartialOrd,
Ord,
Hash,
num_enum::IntoPrimitive,
num_enum::FromPrimitive,
)]
#[repr(i8)]
pub enum UnorderedMatchBehavior {
#[num_enum(default)]
OnlyOrdered = 0,
AllowsUnordered = 1,
}
impl UnorderedMatchBehavior {
pub(crate) const fn from_native(x: c_char) -> Self {
if x == 0 {
Self::OnlyOrdered
} else {
Self::AllowsUnordered
}
}
}
#[derive(Debug, Display, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(i8)]
pub enum MatchAtEndBehavior {
WillNeverMatchAtEOD,
MayMatchAtEOD,
WillOnlyMatchAtEOD,
}
impl MatchAtEndBehavior {
pub(crate) fn from_native(matches_at_eod: c_char, matches_only_at_eod: c_char) -> Self {
match (matches_at_eod, matches_only_at_eod) {
(0, 0) => Self::WillNeverMatchAtEOD,
(x, 0) if x != 0 => Self::MayMatchAtEOD,
(_, x) if x != 0 => Self::WillOnlyMatchAtEOD,
x => unreachable!("unreachable pattern: {:?}", x),
}
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ExprInfo {
pub min_width: ExprWidth,
pub max_width: Option<ExprWidth>,
pub unordered_matches: UnorderedMatchBehavior,
pub matches_at_eod: MatchAtEndBehavior,
}
impl ExprInfo {
pub(crate) fn from_native(x: hs::hs_expr_info) -> Self {
let hs::hs_expr_info {
min_width,
max_width,
unordered_matches,
matches_at_eod,
matches_only_at_eod,
} = x;
let min_width = ExprWidth::parse_min_width(min_width);
let max_width = ExprWidth::parse_max_width(max_width);
let unordered_matches = UnorderedMatchBehavior::from_native(unordered_matches);
let matches_at_eod = MatchAtEndBehavior::from_native(matches_at_eod, matches_only_at_eod);
Self {
min_width,
max_width,
unordered_matches,
matches_at_eod,
}
}
}
}
#[derive(Debug, Copy, Clone)]
#[repr(transparent)]
pub struct ExprExt(hs::hs_expr_ext);
impl Default for ExprExt {
fn default() -> Self { Self::zeroed() }
}
impl ExprExt {
pub fn zeroed() -> Self { unsafe { mem::MaybeUninit::zeroed().assume_init() } }
pub fn from_min_offset(x: usize) -> Self {
let ext_flags = ExtFlags::MIN_OFFSET;
let mut s = Self::zeroed();
s.0.flags = ext_flags.into_native();
s.0.min_offset = x as c_ulonglong;
s
}
pub fn from_max_offset(x: usize) -> Self {
let ext_flags = ExtFlags::MAX_OFFSET;
let mut s = Self::zeroed();
s.0.flags = ext_flags.into_native();
s.0.max_offset = x as c_ulonglong;
s
}
pub fn from_min_length(x: usize) -> Self {
let ext_flags = ExtFlags::MIN_LENGTH;
let mut s = Self::zeroed();
s.0.flags = ext_flags.into_native();
s.0.min_length = x as c_ulonglong;
s
}
pub fn from_edit_distance(x: usize) -> Self {
let ext_flags = ExtFlags::EDIT_DISTANCE;
let mut s = Self::zeroed();
s.0.flags = ext_flags.into_native();
assert!(x < c_uint::MAX as usize);
s.0.edit_distance = x as c_uint;
s
}
pub fn from_hamming_distance(x: usize) -> Self {
let ext_flags = ExtFlags::HAMMING_DISTANCE;
let mut s = Self::zeroed();
s.0.flags = ext_flags.into_native();
assert!(x < c_uint::MAX as usize);
s.0.hamming_distance = x as c_uint;
s
}
const fn ext_flags(&self) -> ExtFlags { ExtFlags::from_native(self.0.flags) }
fn min_offset(&self) -> Option<c_ulonglong> {
if self.ext_flags().has_min_offset() {
Some(self.0.min_offset)
} else {
None
}
}
fn max_offset(&self) -> Option<c_ulonglong> {
if self.ext_flags().has_max_offset() {
Some(self.0.max_offset)
} else {
None
}
}
fn min_length(&self) -> Option<c_ulonglong> {
if self.ext_flags().has_min_length() {
Some(self.0.min_length)
} else {
None
}
}
fn edit_distance(&self) -> Option<c_uint> {
if self.ext_flags().has_edit_distance() {
Some(self.0.edit_distance)
} else {
None
}
}
fn hamming_distance(&self) -> Option<c_uint> {
if self.ext_flags().has_hamming_distance() {
Some(self.0.hamming_distance)
} else {
None
}
}
fn compose(mut self, rhs: Self) -> Self {
self.0.flags = (self.ext_flags() | rhs.ext_flags()).into_native();
if let Some(min_offset) = rhs.min_offset() {
self.0.min_offset = min_offset;
}
if let Some(max_offset) = rhs.max_offset() {
self.0.max_offset = max_offset;
}
if let Some(min_length) = rhs.min_length() {
self.0.min_length = min_length;
}
if let Some(edit_distance) = rhs.edit_distance() {
self.0.edit_distance = edit_distance;
}
if let Some(hamming_distance) = rhs.hamming_distance() {
self.0.hamming_distance = hamming_distance;
}
self
}
pub(crate) fn as_ref_native(&self) -> &hs::hs_expr_ext { &self.0 }
}
impl ops::BitOr for ExprExt {
type Output = Self;
fn bitor(self, other: Self) -> Self { self.compose(other) }
}
impl ops::BitOrAssign for ExprExt {
fn bitor_assign(&mut self, rhs: Self) {
use ops::BitOr;
*self = self.bitor(rhs);
}
}
#[derive(Clone)]
pub struct LiteralSet<'a> {
ptrs: Vec<*const c_char>,
lens: Vec<usize>,
flags: Option<Vec<Flags>>,
ids: Option<Vec<ExprId>>,
_ph: PhantomData<&'a u8>,
}
impl<'a> fmt::Debug for LiteralSet<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let exprs: Vec<&'a [u8]> = self
.ptrs
.iter()
.zip(self.lens.iter())
.map(|(p, n)| unsafe { slice::from_raw_parts(*p as *const u8, *n) })
.collect();
let joined_exprs: String = exprs
.into_iter()
.map(|s| {
str::from_utf8(s)
.map(|s| format!("{:?}", s))
.unwrap_or_else(|_| format!("(non-utf8: {:?})", s))
})
.collect::<Vec<_>>()
.join(", ");
write!(
f,
"LiteralSet(exprs=[{}], flags={:?}, ids={:?})",
joined_exprs, &self.flags, &self.ids
)
}
}
impl<'a> LiteralSet<'a> {
pub fn from_lits(lits: impl IntoIterator<Item=&'a Literal>) -> Self {
let mut ptrs: Vec<_> = Vec::new();
let mut lens: Vec<_> = Vec::new();
for l in lits.into_iter() {
ptrs.push(l.as_ptr());
lens.push(l.as_bytes().len());
}
Self {
ptrs,
lens,
flags: None,
ids: None,
_ph: PhantomData,
}
}
pub fn with_flags(mut self, flags: impl IntoIterator<Item=Flags>) -> Self {
let flags: Vec<_> = flags.into_iter().collect();
assert_eq!(self.len(), flags.len());
self.flags = Some(flags.to_vec());
self
}
pub fn with_ids(mut self, ids: impl IntoIterator<Item=ExprId>) -> Self {
let ids: Vec<_> = ids.into_iter().collect();
assert_eq!(self.len(), ids.len());
self.ids = Some(ids.to_vec());
self
}
pub fn compile(self, mode: Mode) -> Result<Database, VectorscanCompileError> {
Database::compile_multi_literal(&self, mode, None)
}
pub fn len(&self) -> usize { self.ptrs.len() }
pub fn is_empty(&self) -> bool { self.len() == 0 }
pub(crate) fn num_elements(&self) -> c_uint { self.len() as c_uint }
pub(crate) fn literals_ptr(&self) -> *const *const c_char { self.ptrs.as_ptr() }
pub(crate) fn lengths_ptr(&self) -> *const usize { self.lens.as_ptr() }
pub(crate) fn flags_ptr(&self) -> *const c_uint {
self
.flags
.as_ref()
.map(|f| unsafe { mem::transmute(f.as_ptr()) })
.unwrap_or(ptr::null())
}
pub(crate) fn ids_ptr(&self) -> *const c_uint {
self
.ids
.as_ref()
.map(|i| unsafe { mem::transmute(i.as_ptr()) })
.unwrap_or(ptr::null())
}
}
#[cfg(feature = "chimera")]
#[cfg_attr(docsrs, doc(cfg(feature = "chimera")))]
pub mod chimera {
use super::ExprId;
use crate::{
database::chimera::ChimeraDb,
error::chimera::ChimeraCompileError,
flags::chimera::{ChimeraFlags, ChimeraMode},
};
use std::{
ffi::{CStr, CString},
fmt,
marker::PhantomData,
mem,
os::raw::{c_char, c_uint, c_ulong},
ptr, str,
};
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ChimeraExpression(CString);
impl fmt::Debug for ChimeraExpression {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let b = self.as_bytes();
match str::from_utf8(b) {
Ok(s) => write!(f, "ChimeraExpression({:?})", s),
Err(_) => write!(f, "ChimeraExpression({:?})", b),
}
}
}
impl fmt::Display for ChimeraExpression {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let b = self.as_bytes();
match str::from_utf8(b) {
Ok(s) => write!(f, "{}", s),
Err(_) => write!(f, "(non-utf8: {:?})", b),
}
}
}
impl ChimeraExpression {
pub fn as_bytes(&self) -> &[u8] { self.0.as_bytes() }
pub(crate) fn as_ptr(&self) -> *const c_char { self.0.as_c_str().as_ptr() }
pub fn new(x: impl Into<Vec<u8>>) -> Result<Self, ChimeraCompileError> {
Ok(Self(CString::new(x)?))
}
pub fn compile(
&self,
flags: ChimeraFlags,
mode: ChimeraMode,
) -> Result<ChimeraDb, ChimeraCompileError> {
ChimeraDb::compile(self, flags, mode, None)
}
}
impl str::FromStr for ChimeraExpression {
type Err = ChimeraCompileError;
fn from_str(s: &str) -> Result<Self, Self::Err> { Self::new(s) }
}
#[derive(Debug, Copy, Clone)]
pub struct ChimeraMatchLimits {
pub match_limit: c_ulong,
pub match_limit_recursion: c_ulong,
}
#[derive(Clone)]
pub struct ChimeraExpressionSet<'a> {
ptrs: Vec<*const c_char>,
flags: Option<Vec<ChimeraFlags>>,
ids: Option<Vec<ExprId>>,
limits: Option<ChimeraMatchLimits>,
_ph: PhantomData<&'a u8>,
}
impl<'a> fmt::Debug for ChimeraExpressionSet<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let exprs: Vec<&'a CStr> = self
.ptrs
.iter()
.map(|p| unsafe { CStr::from_ptr(*p) })
.collect();
write!(
f,
"ChimeraExpressionSet(exprs={:?}, flags={:?}, ids={:?}, limits={:?})",
exprs, &self.flags, &self.ids, &self.limits
)
}
}
impl<'a> ChimeraExpressionSet<'a> {
pub fn from_exprs(exprs: impl IntoIterator<Item=&'a ChimeraExpression>) -> Self {
Self {
ptrs: exprs.into_iter().map(|e| e.as_ptr()).collect(),
flags: None,
ids: None,
limits: None,
_ph: PhantomData,
}
}
pub fn with_flags(mut self, flags: impl IntoIterator<Item=ChimeraFlags>) -> Self {
let flags: Vec<_> = flags.into_iter().collect();
assert_eq!(self.len(), flags.len());
self.flags = Some(flags);
self
}
pub fn with_ids(mut self, ids: impl IntoIterator<Item=ExprId>) -> Self {
let ids: Vec<_> = ids.into_iter().collect();
assert_eq!(self.len(), ids.len());
self.ids = Some(ids);
self
}
pub fn with_limits(mut self, limits: ChimeraMatchLimits) -> Self {
self.limits = Some(limits);
self
}
pub fn compile(self, mode: ChimeraMode) -> Result<ChimeraDb, ChimeraCompileError> {
ChimeraDb::compile_multi(&self, mode, None)
}
pub fn len(&self) -> usize { self.ptrs.len() }
pub fn is_empty(&self) -> bool { self.len() == 0 }
pub(crate) fn limits(&self) -> Option<ChimeraMatchLimits> { self.limits }
pub(crate) fn num_elements(&self) -> c_uint { self.len() as c_uint }
pub(crate) fn expressions_ptr(&self) -> *const *const c_char { self.ptrs.as_ptr() }
pub(crate) fn flags_ptr(&self) -> *const c_uint {
self
.flags
.as_ref()
.map(|f| unsafe { mem::transmute(f.as_ptr()) })
.unwrap_or(ptr::null())
}
pub(crate) fn ids_ptr(&self) -> *const c_uint {
self
.ids
.as_ref()
.map(|i| unsafe { mem::transmute(i.as_ptr()) })
.unwrap_or(ptr::null())
}
}
}