#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use crate::{StarTerm, StarTriple};
use tracing::trace;
#[derive(Default)]
pub struct SimdPredicateMatcher;
impl SimdPredicateMatcher {
pub fn new() -> Self {
Self
}
pub fn matches(&self, predicate_iri: &str, pattern: &str, exact: bool) -> bool {
if exact {
self.matches_exact(predicate_iri, pattern)
} else {
self.matches_substring(predicate_iri, pattern)
}
}
fn matches_exact(&self, predicate_iri: &str, pattern: &str) -> bool {
if predicate_iri.len() != pattern.len() {
return false;
}
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("sse4.2") {
return unsafe { self.equals_simd(predicate_iri.as_bytes(), pattern.as_bytes()) };
}
}
predicate_iri == pattern
}
fn matches_substring(&self, predicate_iri: &str, pattern: &str) -> bool {
if pattern == "*" {
return true; }
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("sse4.2") {
return unsafe { self.contains_simd(predicate_iri.as_bytes(), pattern.as_bytes()) };
}
}
predicate_iri.contains(pattern)
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse4.2")]
unsafe fn equals_simd(&self, a: &[u8], b: &[u8]) -> bool {
let len = a.len();
let mut i = 0;
while i + 16 <= len {
let chunk_a = _mm_loadu_si128(a.as_ptr().add(i) as *const __m128i);
let chunk_b = _mm_loadu_si128(b.as_ptr().add(i) as *const __m128i);
let cmp = _mm_cmpeq_epi8(chunk_a, chunk_b);
let mask = _mm_movemask_epi8(cmp);
if mask != 0xFFFF {
return false;
}
i += 16;
}
a[i..] == b[i..]
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse4.2")]
unsafe fn contains_simd(&self, haystack: &[u8], needle: &[u8]) -> bool {
if needle.is_empty() {
return true;
}
if needle.len() > haystack.len() {
return false;
}
let needle_first = needle[0];
let needle_len = needle.len();
let mut i = 0;
while i + 16 <= haystack.len() {
let chunk = _mm_loadu_si128(haystack.as_ptr().add(i) as *const __m128i);
let needle_vec = _mm_set1_epi8(needle_first as i8);
let cmp = _mm_cmpeq_epi8(chunk, needle_vec);
let mask = _mm_movemask_epi8(cmp) as u16;
if mask != 0 {
for bit in 0..16 {
if (mask & (1 << bit)) != 0 {
let pos = i + bit;
if pos + needle_len <= haystack.len()
&& &haystack[pos..pos + needle_len] == needle
{
return true;
}
}
}
}
i += 16;
}
haystack[i..].windows(needle_len).any(|w| w == needle)
}
pub fn filter_by_predicate(
&self,
triples: &[StarTriple],
pattern: &str,
exact: bool,
) -> Vec<usize> {
let mut matches = Vec::new();
for (idx, triple) in triples.iter().enumerate() {
if let StarTerm::NamedNode(nn) = &triple.predicate {
if self.matches(&nn.iri, pattern, exact) {
matches.push(idx);
}
}
}
trace!(
"Batch filter: {} matches from {} triples",
matches.len(),
triples.len()
);
matches
}
}
#[derive(Default)]
pub struct SimdCepSequenceMatcher;
impl SimdCepSequenceMatcher {
pub fn new() -> Self {
Self
}
pub fn match_sequence(
&self,
predicates: &[String],
pattern_sequence: &[String],
ordered: bool,
) -> Option<Vec<usize>> {
if ordered {
self.match_ordered_sequence(predicates, pattern_sequence)
} else {
self.match_unordered_sequence(predicates, pattern_sequence)
}
}
fn match_ordered_sequence(
&self,
predicates: &[String],
pattern_sequence: &[String],
) -> Option<Vec<usize>> {
let mut matched_indices = Vec::new();
let mut pattern_idx = 0;
for (pred_idx, predicate) in predicates.iter().enumerate() {
if pattern_idx >= pattern_sequence.len() {
break;
}
let pattern = &pattern_sequence[pattern_idx];
if self.predicate_matches(predicate, pattern) {
matched_indices.push(pred_idx);
pattern_idx += 1;
}
}
if matched_indices.len() == pattern_sequence.len() {
Some(matched_indices)
} else {
None
}
}
fn match_unordered_sequence(
&self,
predicates: &[String],
pattern_sequence: &[String],
) -> Option<Vec<usize>> {
let mut matched_indices = Vec::new();
let mut remaining_patterns: Vec<_> = pattern_sequence.iter().collect();
for (pred_idx, predicate) in predicates.iter().enumerate() {
if remaining_patterns.is_empty() {
break;
}
if let Some(pos) = remaining_patterns
.iter()
.position(|pattern| self.predicate_matches(predicate, pattern))
{
matched_indices.push(pred_idx);
remaining_patterns.remove(pos);
}
}
if remaining_patterns.is_empty() {
Some(matched_indices)
} else {
None
}
}
fn predicate_matches(&self, predicate: &str, pattern: &str) -> bool {
if pattern == "*" {
return true; }
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("sse4.2") {
return unsafe { self.contains_simd(predicate.as_bytes(), pattern.as_bytes()) };
}
}
predicate.contains(pattern)
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse4.2")]
unsafe fn contains_simd(&self, haystack: &[u8], needle: &[u8]) -> bool {
if needle.is_empty() {
return true;
}
if needle.len() > haystack.len() {
return false;
}
let needle_first = needle[0];
let needle_len = needle.len();
let mut i = 0;
while i + 16 <= haystack.len() {
let chunk = _mm_loadu_si128(haystack.as_ptr().add(i) as *const __m128i);
let needle_vec = _mm_set1_epi8(needle_first as i8);
let cmp = _mm_cmpeq_epi8(chunk, needle_vec);
let mask = _mm_movemask_epi8(cmp) as u16;
if mask != 0 {
for bit in 0..16 {
if (mask & (1 << bit)) != 0 {
let pos = i + bit;
if pos + needle_len <= haystack.len()
&& &haystack[pos..pos + needle_len] == needle
{
return true;
}
}
}
}
i += 16;
}
haystack[i..].windows(needle_len).any(|w| w == needle)
}
}
#[derive(Default)]
pub struct SimdQuotedTripleFilter;
impl SimdQuotedTripleFilter {
pub fn new() -> Self {
Self
}
pub fn filter_quoted_triples(
&self,
triples: &[StarTriple],
subject_quoted: bool,
object_quoted: bool,
) -> Vec<usize> {
let mut matches = Vec::new();
for (idx, triple) in triples.iter().enumerate() {
let has_subject_quoted = matches!(triple.subject, StarTerm::QuotedTriple(_));
let has_object_quoted = matches!(triple.object, StarTerm::QuotedTriple(_));
if (subject_quoted && has_subject_quoted) || (object_quoted && has_object_quoted) {
matches.push(idx);
}
}
trace!(
"Quoted triple filter: {} matches from {} triples",
matches.len(),
triples.len()
);
matches
}
pub fn count_quoted(&self, triples: &[StarTriple]) -> usize {
let count = triples
.iter()
.filter(|t| {
matches!(t.subject, StarTerm::QuotedTriple(_))
|| matches!(t.object, StarTerm::QuotedTriple(_))
})
.count();
count
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::StarTerm;
#[test]
fn test_simd_predicate_matcher_exact() {
let matcher = SimdPredicateMatcher::new();
let iri = "http://example.org/predicate";
assert!(matcher.matches(iri, "http://example.org/predicate", true));
assert!(!matcher.matches(iri, "http://example.org/other", true));
}
#[test]
fn test_simd_predicate_matcher_substring() {
let matcher = SimdPredicateMatcher::new();
let iri = "http://example.org/predicate";
assert!(matcher.matches(iri, "predicate", false));
assert!(matcher.matches(iri, "example", false));
assert!(matcher.matches(iri, "*", false)); assert!(!matcher.matches(iri, "notfound", false));
}
#[test]
fn test_simd_predicate_matcher_long_strings() {
let matcher = SimdPredicateMatcher::new();
let long_iri = "http://example.org/very/long/predicate/path/that/exceeds/sixteen/bytes";
let pattern = "very/long/predicate";
assert!(matcher.matches(long_iri, pattern, false));
}
#[test]
fn test_simd_predicate_batch_filter() {
let matcher = SimdPredicateMatcher::new();
let triples = vec![
StarTriple::new(
StarTerm::iri("http://ex.org/s1").unwrap(),
StarTerm::iri("http://ex.org/pred1").unwrap(),
StarTerm::literal("value1").unwrap(),
),
StarTriple::new(
StarTerm::iri("http://ex.org/s2").unwrap(),
StarTerm::iri("http://ex.org/pred2").unwrap(),
StarTerm::literal("value2").unwrap(),
),
StarTriple::new(
StarTerm::iri("http://ex.org/s3").unwrap(),
StarTerm::iri("http://ex.org/pred1").unwrap(),
StarTerm::literal("value3").unwrap(),
),
];
let matches = matcher.filter_by_predicate(&triples, "pred1", false);
assert_eq!(matches, vec![0, 2]);
}
#[test]
fn test_simd_cep_ordered_sequence() {
let matcher = SimdCepSequenceMatcher::new();
let predicates = vec![
"http://ex.org/step1".to_string(),
"http://ex.org/step2".to_string(),
"http://ex.org/step3".to_string(),
];
let pattern = vec![
"step1".to_string(),
"step2".to_string(),
"step3".to_string(),
];
let result = matcher.match_sequence(&predicates, &pattern, true);
assert_eq!(result, Some(vec![0, 1, 2]));
}
#[test]
fn test_simd_cep_ordered_sequence_no_match() {
let matcher = SimdCepSequenceMatcher::new();
let predicates = vec![
"http://ex.org/step1".to_string(),
"http://ex.org/step3".to_string(), "http://ex.org/step2".to_string(),
];
let pattern = vec![
"step1".to_string(),
"step2".to_string(),
"step3".to_string(),
];
let result = matcher.match_sequence(&predicates, &pattern, true);
assert_eq!(result, None); }
#[test]
fn test_simd_cep_unordered_sequence() {
let matcher = SimdCepSequenceMatcher::new();
let predicates = vec![
"http://ex.org/step3".to_string(),
"http://ex.org/step1".to_string(),
"http://ex.org/step2".to_string(),
];
let pattern = vec![
"step1".to_string(),
"step2".to_string(),
"step3".to_string(),
];
let result = matcher.match_sequence(&predicates, &pattern, false);
assert!(result.is_some());
let indices = result.unwrap();
assert_eq!(indices.len(), 3);
}
#[test]
fn test_simd_cep_wildcard_pattern() {
let matcher = SimdCepSequenceMatcher::new();
let predicates = vec![
"http://ex.org/anything".to_string(),
"http://ex.org/step2".to_string(),
];
let pattern = vec!["*".to_string(), "step2".to_string()];
let result = matcher.match_sequence(&predicates, &pattern, true);
assert_eq!(result, Some(vec![0, 1]));
}
#[test]
fn test_simd_quoted_triple_filter() {
let filter = SimdQuotedTripleFilter::new();
let qt = StarTerm::quoted_triple(StarTriple::new(
StarTerm::iri("http://ex.org/qs").unwrap(),
StarTerm::iri("http://ex.org/qp").unwrap(),
StarTerm::literal("qv").unwrap(),
));
let triples = vec![
StarTriple::new(
qt.clone(),
StarTerm::iri("http://ex.org/meta").unwrap(),
StarTerm::literal("value").unwrap(),
),
StarTriple::new(
StarTerm::iri("http://ex.org/s").unwrap(),
StarTerm::iri("http://ex.org/p").unwrap(),
qt.clone(),
),
StarTriple::new(
StarTerm::iri("http://ex.org/s2").unwrap(),
StarTerm::iri("http://ex.org/p2").unwrap(),
StarTerm::literal("v2").unwrap(),
),
];
let subject_matches = filter.filter_quoted_triples(&triples, true, false);
assert_eq!(subject_matches, vec![0]);
let object_matches = filter.filter_quoted_triples(&triples, false, true);
assert_eq!(object_matches, vec![1]);
let any_matches = filter.filter_quoted_triples(&triples, true, true);
assert_eq!(any_matches, vec![0, 1]);
let count = filter.count_quoted(&triples);
assert_eq!(count, 2);
}
}