use std::collections::{HashMap, HashSet};
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::RwLock;
use crate::{Rule, RuleAtom, Term};
#[derive(Debug, Clone)]
pub struct IndexConfig {
pub predicate_indexing: bool,
pub first_arg_indexing: bool,
pub combined_indexing: bool,
pub hash_threshold: usize,
pub collect_statistics: bool,
}
impl Default for IndexConfig {
fn default() -> Self {
Self {
predicate_indexing: true,
first_arg_indexing: true,
combined_indexing: true,
hash_threshold: 10,
collect_statistics: true,
}
}
}
impl IndexConfig {
pub fn with_predicate_indexing(mut self, enabled: bool) -> Self {
self.predicate_indexing = enabled;
self
}
pub fn with_first_arg_indexing(mut self, enabled: bool) -> Self {
self.first_arg_indexing = enabled;
self
}
pub fn with_combined_indexing(mut self, enabled: bool) -> Self {
self.combined_indexing = enabled;
self
}
pub fn with_hash_threshold(mut self, threshold: usize) -> Self {
self.hash_threshold = threshold;
self
}
pub fn with_statistics(mut self, enabled: bool) -> Self {
self.collect_statistics = enabled;
self
}
}
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct PredicateKey(pub String);
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct FirstArgKey {
pub predicate: String,
pub first_arg: Option<String>,
}
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct CombinedKey {
pub predicate: String,
pub subject_type: ArgType,
pub object_type: ArgType,
}
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub enum ArgType {
Variable,
Constant(String),
Any,
}
#[derive(Debug, Default)]
pub struct IndexStatistics {
pub total_lookups: AtomicU64,
pub predicate_hits: AtomicU64,
pub first_arg_hits: AtomicU64,
pub combined_hits: AtomicU64,
pub full_scans: AtomicU64,
pub rules_checked: AtomicU64,
pub rules_matched: AtomicU64,
}
impl IndexStatistics {
pub fn new() -> Self {
Self::default()
}
pub fn predicate_hit_rate(&self) -> f64 {
let total = self.total_lookups.load(Ordering::Relaxed);
if total == 0 {
return 0.0;
}
self.predicate_hits.load(Ordering::Relaxed) as f64 / total as f64
}
pub fn first_arg_hit_rate(&self) -> f64 {
let total = self.total_lookups.load(Ordering::Relaxed);
if total == 0 {
return 0.0;
}
self.first_arg_hits.load(Ordering::Relaxed) as f64 / total as f64
}
pub fn combined_hit_rate(&self) -> f64 {
let total = self.total_lookups.load(Ordering::Relaxed);
if total == 0 {
return 0.0;
}
self.combined_hits.load(Ordering::Relaxed) as f64 / total as f64
}
pub fn selectivity(&self) -> f64 {
let checked = self.rules_checked.load(Ordering::Relaxed);
if checked == 0 {
return 0.0;
}
self.rules_matched.load(Ordering::Relaxed) as f64 / checked as f64
}
pub fn reset(&self) {
self.total_lookups.store(0, Ordering::Relaxed);
self.predicate_hits.store(0, Ordering::Relaxed);
self.first_arg_hits.store(0, Ordering::Relaxed);
self.combined_hits.store(0, Ordering::Relaxed);
self.full_scans.store(0, Ordering::Relaxed);
self.rules_checked.store(0, Ordering::Relaxed);
self.rules_matched.store(0, Ordering::Relaxed);
}
pub fn snapshot(&self) -> IndexStatisticsSnapshot {
IndexStatisticsSnapshot {
total_lookups: self.total_lookups.load(Ordering::Relaxed),
predicate_hits: self.predicate_hits.load(Ordering::Relaxed),
first_arg_hits: self.first_arg_hits.load(Ordering::Relaxed),
combined_hits: self.combined_hits.load(Ordering::Relaxed),
full_scans: self.full_scans.load(Ordering::Relaxed),
rules_checked: self.rules_checked.load(Ordering::Relaxed),
rules_matched: self.rules_matched.load(Ordering::Relaxed),
}
}
}
#[derive(Debug, Clone)]
pub struct IndexStatisticsSnapshot {
pub total_lookups: u64,
pub predicate_hits: u64,
pub first_arg_hits: u64,
pub combined_hits: u64,
pub full_scans: u64,
pub rules_checked: u64,
pub rules_matched: u64,
}
pub type RuleId = usize;
#[derive(Debug)]
pub struct RuleIndex {
config: IndexConfig,
rules: RwLock<Vec<Rule>>,
predicate_index: RwLock<HashMap<PredicateKey, HashSet<RuleId>>>,
first_arg_index: RwLock<HashMap<FirstArgKey, HashSet<RuleId>>>,
combined_index: RwLock<HashMap<CombinedKey, HashSet<RuleId>>>,
statistics: IndexStatistics,
}
impl RuleIndex {
pub fn new(config: IndexConfig) -> Self {
Self {
config,
rules: RwLock::new(Vec::new()),
predicate_index: RwLock::new(HashMap::new()),
first_arg_index: RwLock::new(HashMap::new()),
combined_index: RwLock::new(HashMap::new()),
statistics: IndexStatistics::new(),
}
}
pub fn with_defaults() -> Self {
Self::new(IndexConfig::default())
}
pub fn add_rule(&self, rule: Rule) -> RuleId {
let mut rules = self.rules.write().expect("lock poisoned");
let rule_id = rules.len();
if self.config.predicate_indexing {
self.index_by_predicate(&rule, rule_id);
}
if self.config.first_arg_indexing {
self.index_by_first_arg(&rule, rule_id);
}
if self.config.combined_indexing {
self.index_by_combined(&rule, rule_id);
}
rules.push(rule);
rule_id
}
pub fn add_rules(&self, rules: Vec<Rule>) -> Vec<RuleId> {
rules.into_iter().map(|r| self.add_rule(r)).collect()
}
pub fn remove_rule(&self, rule_id: RuleId) -> Option<Rule> {
let mut rules = self.rules.write().expect("lock poisoned");
if rule_id >= rules.len() {
return None;
}
let rule = rules[rule_id].clone();
if self.config.predicate_indexing {
self.unindex_by_predicate(&rule, rule_id);
}
if self.config.first_arg_indexing {
self.unindex_by_first_arg(&rule, rule_id);
}
if self.config.combined_indexing {
self.unindex_by_combined(&rule, rule_id);
}
rules[rule_id] = Rule {
name: String::new(),
body: Vec::new(),
head: Vec::new(),
};
Some(rule)
}
pub fn find_rules_by_predicate(&self, predicate: &str) -> Vec<&Rule> {
if self.config.collect_statistics {
self.statistics
.total_lookups
.fetch_add(1, Ordering::Relaxed);
}
let rules = self.rules.read().expect("lock poisoned");
if self.config.predicate_indexing {
let pred_index = self.predicate_index.read().expect("lock poisoned");
let key = PredicateKey(predicate.to_string());
if let Some(rule_ids) = pred_index.get(&key) {
if self.config.collect_statistics {
self.statistics
.predicate_hits
.fetch_add(1, Ordering::Relaxed);
self.statistics
.rules_checked
.fetch_add(rule_ids.len() as u64, Ordering::Relaxed);
}
let result: Vec<*const Rule> = rule_ids
.iter()
.filter_map(|&id| {
let rule = rules.get(id)?;
if !rule.name.is_empty() {
Some(rule as *const Rule)
} else {
None
}
})
.collect();
drop(rules);
drop(pred_index);
return result.into_iter().map(|p| unsafe { &*p }).collect();
}
}
if self.config.collect_statistics {
self.statistics.full_scans.fetch_add(1, Ordering::Relaxed);
self.statistics
.rules_checked
.fetch_add(rules.len() as u64, Ordering::Relaxed);
}
Vec::new()
}
pub fn find_rules_for_triple(
&self,
subject: Option<&str>,
predicate: &str,
object: Option<&str>,
) -> Vec<RuleId> {
if self.config.collect_statistics {
self.statistics
.total_lookups
.fetch_add(1, Ordering::Relaxed);
}
if self.config.combined_indexing {
let combined = self.combined_index.read().expect("lock poisoned");
let key = CombinedKey {
predicate: predicate.to_string(),
subject_type: subject.map_or(ArgType::Any, |s| ArgType::Constant(s.to_string())),
object_type: object.map_or(ArgType::Any, |o| ArgType::Constant(o.to_string())),
};
if let Some(rule_ids) = combined.get(&key) {
if self.config.collect_statistics {
self.statistics
.combined_hits
.fetch_add(1, Ordering::Relaxed);
self.statistics
.rules_checked
.fetch_add(rule_ids.len() as u64, Ordering::Relaxed);
}
return rule_ids.iter().copied().collect();
}
}
if self.config.first_arg_indexing && subject.is_some() {
let first_arg = self.first_arg_index.read().expect("lock poisoned");
let key = FirstArgKey {
predicate: predicate.to_string(),
first_arg: subject.map(|s| s.to_string()),
};
if let Some(rule_ids) = first_arg.get(&key) {
if self.config.collect_statistics {
self.statistics
.first_arg_hits
.fetch_add(1, Ordering::Relaxed);
self.statistics
.rules_checked
.fetch_add(rule_ids.len() as u64, Ordering::Relaxed);
}
return rule_ids.iter().copied().collect();
}
}
if self.config.predicate_indexing {
let pred_index = self.predicate_index.read().expect("lock poisoned");
let key = PredicateKey(predicate.to_string());
if let Some(rule_ids) = pred_index.get(&key) {
if self.config.collect_statistics {
self.statistics
.predicate_hits
.fetch_add(1, Ordering::Relaxed);
self.statistics
.rules_checked
.fetch_add(rule_ids.len() as u64, Ordering::Relaxed);
}
return rule_ids.iter().copied().collect();
}
}
if self.config.collect_statistics {
self.statistics.full_scans.fetch_add(1, Ordering::Relaxed);
}
let rules = self.rules.read().expect("lock poisoned");
if self.config.collect_statistics {
self.statistics
.rules_checked
.fetch_add(rules.len() as u64, Ordering::Relaxed);
}
(0..rules.len())
.filter(|&id| !rules[id].name.is_empty())
.collect()
}
pub fn get_rule(&self, rule_id: RuleId) -> Option<Rule> {
let rules = self.rules.read().expect("lock poisoned");
rules.get(rule_id).filter(|r| !r.name.is_empty()).cloned()
}
pub fn get_all_rules(&self) -> Vec<Rule> {
let rules = self.rules.read().expect("lock poisoned");
rules
.iter()
.filter(|r| !r.name.is_empty())
.cloned()
.collect()
}
pub fn rule_count(&self) -> usize {
let rules = self.rules.read().expect("lock poisoned");
rules.iter().filter(|r| !r.name.is_empty()).count()
}
pub fn statistics(&self) -> &IndexStatistics {
&self.statistics
}
pub fn statistics_snapshot(&self) -> IndexStatisticsSnapshot {
self.statistics.snapshot()
}
pub fn reset_statistics(&self) {
self.statistics.reset();
}
pub fn clear(&self) {
let mut rules = self.rules.write().expect("lock poisoned");
let mut pred_index = self.predicate_index.write().expect("lock poisoned");
let mut first_arg = self.first_arg_index.write().expect("lock poisoned");
let mut combined = self.combined_index.write().expect("lock poisoned");
rules.clear();
pred_index.clear();
first_arg.clear();
combined.clear();
self.statistics.reset();
}
pub fn memory_usage(&self) -> usize {
let rules = self.rules.read().expect("lock poisoned");
let pred_index = self.predicate_index.read().expect("lock poisoned");
let first_arg = self.first_arg_index.read().expect("lock poisoned");
let combined = self.combined_index.read().expect("lock poisoned");
let rules_size = rules.len() * std::mem::size_of::<Rule>();
let pred_size = pred_index.len()
* (std::mem::size_of::<PredicateKey>() + std::mem::size_of::<HashSet<RuleId>>());
let first_arg_size = first_arg.len()
* (std::mem::size_of::<FirstArgKey>() + std::mem::size_of::<HashSet<RuleId>>());
let combined_size = combined.len()
* (std::mem::size_of::<CombinedKey>() + std::mem::size_of::<HashSet<RuleId>>());
rules_size + pred_size + first_arg_size + combined_size
}
fn index_by_predicate(&self, rule: &Rule, rule_id: RuleId) {
let mut pred_index = self.predicate_index.write().expect("lock poisoned");
for atom in &rule.body {
if let Some(predicate) = Self::extract_predicate(atom) {
let key = PredicateKey(predicate);
pred_index.entry(key).or_default().insert(rule_id);
}
}
}
fn unindex_by_predicate(&self, rule: &Rule, rule_id: RuleId) {
let mut pred_index = self.predicate_index.write().expect("lock poisoned");
for atom in &rule.body {
if let Some(predicate) = Self::extract_predicate(atom) {
let key = PredicateKey(predicate);
if let Some(ids) = pred_index.get_mut(&key) {
ids.remove(&rule_id);
if ids.is_empty() {
pred_index.remove(&key);
}
}
}
}
}
fn index_by_first_arg(&self, rule: &Rule, rule_id: RuleId) {
let mut first_arg_index = self.first_arg_index.write().expect("lock poisoned");
for atom in &rule.body {
if let (Some(predicate), first_arg) =
(Self::extract_predicate(atom), Self::extract_first_arg(atom))
{
let key = FirstArgKey {
predicate,
first_arg,
};
first_arg_index.entry(key).or_default().insert(rule_id);
}
}
}
fn unindex_by_first_arg(&self, rule: &Rule, rule_id: RuleId) {
let mut first_arg_index = self.first_arg_index.write().expect("lock poisoned");
for atom in &rule.body {
if let (Some(predicate), first_arg) =
(Self::extract_predicate(atom), Self::extract_first_arg(atom))
{
let key = FirstArgKey {
predicate,
first_arg,
};
if let Some(ids) = first_arg_index.get_mut(&key) {
ids.remove(&rule_id);
if ids.is_empty() {
first_arg_index.remove(&key);
}
}
}
}
}
fn index_by_combined(&self, rule: &Rule, rule_id: RuleId) {
let mut combined_index = self.combined_index.write().expect("lock poisoned");
for atom in &rule.body {
if let Some(key) = Self::extract_combined_key(atom) {
combined_index.entry(key).or_default().insert(rule_id);
}
}
}
fn unindex_by_combined(&self, rule: &Rule, rule_id: RuleId) {
let mut combined_index = self.combined_index.write().expect("lock poisoned");
for atom in &rule.body {
if let Some(key) = Self::extract_combined_key(atom) {
if let Some(ids) = combined_index.get_mut(&key) {
ids.remove(&rule_id);
if ids.is_empty() {
combined_index.remove(&key);
}
}
}
}
}
fn extract_predicate(atom: &RuleAtom) -> Option<String> {
match atom {
RuleAtom::Triple {
predicate: Term::Constant(c),
..
} => Some(c.clone()),
RuleAtom::Triple { .. } => None,
RuleAtom::Builtin { name, .. } => Some(name.clone()),
_ => None,
}
}
fn extract_first_arg(atom: &RuleAtom) -> Option<String> {
match atom {
RuleAtom::Triple {
subject: Term::Constant(c),
..
} => Some(c.clone()),
RuleAtom::Triple { .. } => None,
RuleAtom::Builtin { args, .. } => args.first().and_then(|arg| {
if let Term::Constant(c) = arg {
Some(c.clone())
} else {
None
}
}),
_ => None,
}
}
fn extract_combined_key(atom: &RuleAtom) -> Option<CombinedKey> {
match atom {
RuleAtom::Triple {
subject,
predicate,
object,
} => {
let predicate = match predicate {
Term::Constant(c) => c.clone(),
_ => return None,
};
let subject_type = match subject {
Term::Constant(c) => ArgType::Constant(c.clone()),
Term::Variable(_) => ArgType::Variable,
_ => ArgType::Any,
};
let object_type = match object {
Term::Constant(c) => ArgType::Constant(c.clone()),
Term::Variable(_) => ArgType::Variable,
_ => ArgType::Any,
};
Some(CombinedKey {
predicate,
subject_type,
object_type,
})
}
_ => None,
}
}
}
impl Default for RuleIndex {
fn default() -> Self {
Self::with_defaults()
}
}
pub struct RuleIndexBuilder {
config: IndexConfig,
rules: Vec<Rule>,
}
impl RuleIndexBuilder {
pub fn new() -> Self {
Self {
config: IndexConfig::default(),
rules: Vec::new(),
}
}
pub fn config(mut self, config: IndexConfig) -> Self {
self.config = config;
self
}
pub fn add_rule(mut self, rule: Rule) -> Self {
self.rules.push(rule);
self
}
pub fn add_rules(mut self, rules: Vec<Rule>) -> Self {
self.rules.extend(rules);
self
}
pub fn build(self) -> RuleIndex {
let index = RuleIndex::new(self.config);
for rule in self.rules {
index.add_rule(rule);
}
index
}
}
impl Default for RuleIndexBuilder {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct DependencyEdge {
pub from: RuleId,
pub to: RuleId,
pub predicate: String,
}
#[derive(Debug, Default)]
pub struct RuleDependencyGraph {
edges: Vec<DependencyEdge>,
forward: HashMap<RuleId, Vec<RuleId>>,
backward: HashMap<RuleId, Vec<RuleId>>,
}
impl RuleDependencyGraph {
pub fn from_index(index: &RuleIndex) -> Self {
let rules = index.rules.read().expect("lock poisoned");
let mut graph = Self::default();
let mut head_preds: HashMap<String, Vec<RuleId>> = HashMap::new();
for (id, rule) in rules.iter().enumerate() {
if rule.name.is_empty() {
continue;
}
for atom in &rule.head {
if let Some(pred) = RuleIndex::extract_predicate(atom) {
head_preds.entry(pred).or_default().push(id);
}
}
}
for (consumer_id, rule) in rules.iter().enumerate() {
if rule.name.is_empty() {
continue;
}
for atom in &rule.body {
if let Some(pred) = RuleIndex::extract_predicate(atom) {
if let Some(producers) = head_preds.get(&pred) {
for &producer_id in producers {
if producer_id != consumer_id {
graph.add_edge(DependencyEdge {
from: producer_id,
to: consumer_id,
predicate: pred.clone(),
});
}
}
}
}
}
}
graph
}
fn add_edge(&mut self, edge: DependencyEdge) {
self.forward.entry(edge.from).or_default().push(edge.to);
self.backward.entry(edge.to).or_default().push(edge.from);
self.edges.push(edge);
}
pub fn triggered_by(&self, rule_id: RuleId) -> Vec<RuleId> {
self.forward.get(&rule_id).cloned().unwrap_or_default()
}
pub fn triggers_of(&self, rule_id: RuleId) -> Vec<RuleId> {
self.backward.get(&rule_id).cloned().unwrap_or_default()
}
pub fn edges(&self) -> &[DependencyEdge] {
&self.edges
}
pub fn edge_count(&self) -> usize {
self.edges.len()
}
pub fn roots(&self) -> Vec<RuleId> {
let all_targets: HashSet<RuleId> = self.edges.iter().map(|e| e.to).collect();
let all_sources: HashSet<RuleId> = self.edges.iter().map(|e| e.from).collect();
all_sources.difference(&all_targets).copied().collect()
}
pub fn has_cycle(&self) -> bool {
let all_nodes: HashSet<RuleId> = self.edges.iter().flat_map(|e| [e.from, e.to]).collect();
let mut visited = HashSet::new();
let mut on_stack = HashSet::new();
for &node in &all_nodes {
if !visited.contains(&node) && self.dfs_cycle(node, &mut visited, &mut on_stack) {
return true;
}
}
false
}
fn dfs_cycle(
&self,
node: RuleId,
visited: &mut HashSet<RuleId>,
on_stack: &mut HashSet<RuleId>,
) -> bool {
visited.insert(node);
on_stack.insert(node);
if let Some(neighbours) = self.forward.get(&node) {
for &next in neighbours {
if !visited.contains(&next) {
if self.dfs_cycle(next, visited, on_stack) {
return true;
}
} else if on_stack.contains(&next) {
return true;
}
}
}
on_stack.remove(&node);
false
}
pub fn topological_order(&self) -> Vec<RuleId> {
let all_nodes: HashSet<RuleId> = self.edges.iter().flat_map(|e| [e.from, e.to]).collect();
let mut in_degree: HashMap<RuleId, usize> = HashMap::new();
for &node in &all_nodes {
in_degree.entry(node).or_insert(0);
}
for edge in &self.edges {
*in_degree.entry(edge.to).or_insert(0) += 1;
}
let mut queue: Vec<RuleId> = in_degree
.iter()
.filter(|(_, °)| deg == 0)
.map(|(&id, _)| id)
.collect();
queue.sort_unstable();
let mut result = Vec::new();
while let Some(node) = queue.pop() {
result.push(node);
if let Some(neighbours) = self.forward.get(&node) {
for &next in neighbours {
if let Some(deg) = in_degree.get_mut(&next) {
*deg = deg.saturating_sub(1);
if *deg == 0 {
queue.push(next);
queue.sort_unstable();
}
}
}
}
}
if result.len() < all_nodes.len() {
Vec::new()
} else {
result
}
}
}
#[derive(Debug, Clone)]
pub struct PrioritizedRule {
pub id: RuleId,
pub name: String,
pub priority: i32,
}
#[derive(Debug, Default)]
pub struct PriorityIndex {
priorities: HashMap<RuleId, i32>,
}
impl PriorityIndex {
pub fn new() -> Self {
Self::default()
}
pub fn set_priority(&mut self, rule_id: RuleId, priority: i32) {
self.priorities.insert(rule_id, priority);
}
pub fn get_priority(&self, rule_id: RuleId) -> i32 {
self.priorities.get(&rule_id).copied().unwrap_or(0)
}
pub fn remove_priority(&mut self, rule_id: RuleId) {
self.priorities.remove(&rule_id);
}
pub fn sort_by_priority(&self, rule_ids: &mut [RuleId]) {
rule_ids.sort_by(|a, b| {
let pa = self.get_priority(*a);
let pb = self.get_priority(*b);
pb.cmp(&pa) });
}
pub fn ordered_rules(&self, index: &RuleIndex) -> Vec<PrioritizedRule> {
let rules = index.rules.read().expect("lock poisoned");
let mut result: Vec<PrioritizedRule> = rules
.iter()
.enumerate()
.filter(|(_, r)| !r.name.is_empty())
.map(|(id, r)| PrioritizedRule {
id,
name: r.name.clone(),
priority: self.get_priority(id),
})
.collect();
result.sort_by_key(|b| std::cmp::Reverse(b.priority));
result
}
pub fn len(&self) -> usize {
self.priorities.len()
}
pub fn is_empty(&self) -> bool {
self.priorities.is_empty()
}
pub fn clear(&mut self) {
self.priorities.clear();
}
}
pub fn wildcard_matches(pattern: &str, text: &str) -> bool {
if pattern == "*" {
return true;
}
if !pattern.contains('*') {
return pattern == text;
}
let parts: Vec<&str> = pattern.split('*').collect();
if parts.is_empty() {
return true;
}
let mut pos = 0usize;
for (i, part) in parts.iter().enumerate() {
if part.is_empty() {
continue;
}
if let Some(found) = text[pos..].find(part) {
if i == 0 && found != 0 {
return false;
}
pos += found + part.len();
} else {
return false;
}
}
if let Some(last) = parts.last() {
if !last.is_empty() && !text.ends_with(last) {
return false;
}
}
true
}
impl RuleIndex {
pub fn find_rules_by_wildcard(&self, pattern: &str) -> Vec<RuleId> {
let rules = self.rules.read().expect("lock poisoned");
let mut result = Vec::new();
for (id, rule) in rules.iter().enumerate() {
if rule.name.is_empty() {
continue;
}
for atom in &rule.body {
if let Some(pred) = Self::extract_predicate(atom) {
if wildcard_matches(pattern, &pred) {
result.push(id);
break;
}
}
}
}
result
}
pub fn dependency_graph(&self) -> RuleDependencyGraph {
RuleDependencyGraph::from_index(self)
}
pub fn reindex_rule(&self, rule_id: RuleId) -> bool {
let rules = self.rules.read().expect("lock poisoned");
let rule = match rules.get(rule_id) {
Some(r) if !r.name.is_empty() => r.clone(),
_ => return false,
};
drop(rules);
if self.config.predicate_indexing {
self.unindex_by_predicate(&rule, rule_id);
}
if self.config.first_arg_indexing {
self.unindex_by_first_arg(&rule, rule_id);
}
if self.config.combined_indexing {
self.unindex_by_combined(&rule, rule_id);
}
if self.config.predicate_indexing {
self.index_by_predicate(&rule, rule_id);
}
if self.config.first_arg_indexing {
self.index_by_first_arg(&rule, rule_id);
}
if self.config.combined_indexing {
self.index_by_combined(&rule, rule_id);
}
true
}
pub fn replace_rule(&self, rule_id: RuleId, new_rule: Rule) -> Option<Rule> {
let rules = self.rules.read().expect("lock poisoned");
if rule_id >= rules.len() || rules[rule_id].name.is_empty() {
return None;
}
let old_rule = rules[rule_id].clone();
drop(rules);
if self.config.predicate_indexing {
self.unindex_by_predicate(&old_rule, rule_id);
}
if self.config.first_arg_indexing {
self.unindex_by_first_arg(&old_rule, rule_id);
}
if self.config.combined_indexing {
self.unindex_by_combined(&old_rule, rule_id);
}
if self.config.predicate_indexing {
self.index_by_predicate(&new_rule, rule_id);
}
if self.config.first_arg_indexing {
self.index_by_first_arg(&new_rule, rule_id);
}
if self.config.combined_indexing {
self.index_by_combined(&new_rule, rule_id);
}
let mut rules = self.rules.write().expect("lock poisoned");
rules[rule_id] = new_rule;
Some(old_rule)
}
pub fn predicate_density(&self) -> f64 {
let pred_index = self.predicate_index.read().expect("lock poisoned");
if pred_index.is_empty() {
return 0.0;
}
let total_entries: usize = pred_index.values().map(|v| v.len()).sum();
total_entries as f64 / pred_index.len() as f64
}
}
#[cfg(test)]
mod tests {
use super::*;
fn create_test_rule(name: &str, pred: &str) -> Rule {
Rule {
name: name.to_string(),
body: vec![RuleAtom::Triple {
subject: Term::Variable("X".to_string()),
predicate: Term::Constant(pred.to_string()),
object: Term::Variable("Y".to_string()),
}],
head: vec![RuleAtom::Triple {
subject: Term::Variable("X".to_string()),
predicate: Term::Constant(format!("inferred_{pred}")),
object: Term::Variable("Y".to_string()),
}],
}
}
#[test]
fn test_add_and_find_rule() {
let index = RuleIndex::with_defaults();
let rule = create_test_rule("rule1", "parent");
let id = index.add_rule(rule);
assert_eq!(id, 0);
let found = index.find_rules_for_triple(None, "parent", None);
assert_eq!(found.len(), 1);
assert_eq!(found[0], 0);
}
#[test]
fn test_multiple_rules_same_predicate() {
let index = RuleIndex::with_defaults();
index.add_rule(create_test_rule("rule1", "parent"));
index.add_rule(create_test_rule("rule2", "parent"));
index.add_rule(create_test_rule("rule3", "child"));
let parent_rules = index.find_rules_for_triple(None, "parent", None);
assert_eq!(parent_rules.len(), 2);
let child_rules = index.find_rules_for_triple(None, "child", None);
assert_eq!(child_rules.len(), 1);
}
#[test]
fn test_remove_rule() {
let index = RuleIndex::with_defaults();
let id1 = index.add_rule(create_test_rule("rule1", "parent"));
let id2 = index.add_rule(create_test_rule("rule2", "parent"));
assert_eq!(index.rule_count(), 2);
let removed = index.remove_rule(id1);
assert!(removed.is_some());
assert_eq!(removed.expect("already checked is_some").name, "rule1");
assert_eq!(index.rule_count(), 1);
let found = index.find_rules_for_triple(None, "parent", None);
assert_eq!(found.len(), 1);
assert_eq!(found[0], id2);
}
#[test]
fn test_statistics() {
let config = IndexConfig::default().with_statistics(true);
let index = RuleIndex::new(config);
index.add_rule(create_test_rule("rule1", "parent"));
index.add_rule(create_test_rule("rule2", "child"));
index.find_rules_for_triple(None, "parent", None);
index.find_rules_for_triple(None, "child", None);
index.find_rules_for_triple(None, "unknown", None);
let stats = index.statistics_snapshot();
assert_eq!(stats.total_lookups, 3);
assert!(stats.predicate_hits >= 2); }
#[test]
fn test_clear() {
let index = RuleIndex::with_defaults();
index.add_rule(create_test_rule("rule1", "parent"));
index.add_rule(create_test_rule("rule2", "child"));
assert_eq!(index.rule_count(), 2);
index.clear();
assert_eq!(index.rule_count(), 0);
}
#[test]
fn test_builder() {
let index = RuleIndexBuilder::new()
.config(IndexConfig::default().with_combined_indexing(true))
.add_rule(create_test_rule("rule1", "parent"))
.add_rule(create_test_rule("rule2", "child"))
.build();
assert_eq!(index.rule_count(), 2);
}
#[test]
fn test_get_rule() {
let index = RuleIndex::with_defaults();
let rule = create_test_rule("rule1", "parent");
let id = index.add_rule(rule.clone());
let retrieved = index.get_rule(id);
assert!(retrieved.is_some());
assert_eq!(retrieved.expect("already checked is_some").name, "rule1");
let not_found = index.get_rule(999);
assert!(not_found.is_none());
}
#[test]
fn test_get_all_rules() {
let index = RuleIndex::with_defaults();
index.add_rule(create_test_rule("rule1", "parent"));
index.add_rule(create_test_rule("rule2", "child"));
index.add_rule(create_test_rule("rule3", "sibling"));
let all = index.get_all_rules();
assert_eq!(all.len(), 3);
}
#[test]
fn test_memory_usage() {
let index = RuleIndex::with_defaults();
for i in 0..100 {
index.add_rule(create_test_rule(
&format!("rule{i}"),
&format!("pred{}", i % 10),
));
}
let usage = index.memory_usage();
assert!(usage > 0);
}
#[test]
fn test_combined_key_indexing() {
let index = RuleIndex::new(IndexConfig::default().with_combined_indexing(true));
let rule_with_const = Rule {
name: "const_subject".to_string(),
body: vec![RuleAtom::Triple {
subject: Term::Constant("john".to_string()),
predicate: Term::Constant("knows".to_string()),
object: Term::Variable("Y".to_string()),
}],
head: vec![RuleAtom::Triple {
subject: Term::Constant("john".to_string()),
predicate: Term::Constant("friend".to_string()),
object: Term::Variable("Y".to_string()),
}],
};
index.add_rule(rule_with_const);
let found = index.find_rules_for_triple(Some("john"), "knows", None);
assert_eq!(found.len(), 1);
}
#[test]
fn test_first_arg_indexing() {
let config = IndexConfig::default()
.with_predicate_indexing(true)
.with_first_arg_indexing(true);
let index = RuleIndex::new(config);
index.add_rule(create_test_rule("rule1", "parent"));
let found = index.find_rules_for_triple(None, "parent", None);
assert!(!found.is_empty());
}
#[test]
fn test_statistics_reset() {
let index = RuleIndex::with_defaults();
index.add_rule(create_test_rule("rule1", "parent"));
index.find_rules_for_triple(None, "parent", None);
let stats_before = index.statistics_snapshot();
assert!(stats_before.total_lookups > 0);
index.reset_statistics();
let stats_after = index.statistics_snapshot();
assert_eq!(stats_after.total_lookups, 0);
}
#[test]
fn test_hit_rates() {
let index = RuleIndex::with_defaults();
index.add_rule(create_test_rule("rule1", "parent"));
index.add_rule(create_test_rule("rule2", "child"));
for _ in 0..10 {
index.find_rules_for_triple(None, "parent", None);
}
let stats = index.statistics();
assert!(stats.predicate_hit_rate() > 0.0);
}
#[test]
fn test_builtin_indexing() {
let index = RuleIndex::with_defaults();
let rule = Rule {
name: "builtin_rule".to_string(),
body: vec![
RuleAtom::Triple {
subject: Term::Variable("X".to_string()),
predicate: Term::Constant("hasAge".to_string()),
object: Term::Variable("Age".to_string()),
},
RuleAtom::Builtin {
name: "greaterThan".to_string(),
args: vec![
Term::Variable("Age".to_string()),
Term::Constant("18".to_string()),
],
},
],
head: vec![RuleAtom::Triple {
subject: Term::Variable("X".to_string()),
predicate: Term::Constant("isAdult".to_string()),
object: Term::Constant("true".to_string()),
}],
};
index.add_rule(rule);
let found = index.find_rules_for_triple(None, "hasAge", None);
assert_eq!(found.len(), 1);
}
fn create_chain_rule(name: &str, body_pred: &str, head_pred: &str) -> Rule {
Rule {
name: name.to_string(),
body: vec![RuleAtom::Triple {
subject: Term::Variable("X".to_string()),
predicate: Term::Constant(body_pred.to_string()),
object: Term::Variable("Y".to_string()),
}],
head: vec![RuleAtom::Triple {
subject: Term::Variable("X".to_string()),
predicate: Term::Constant(head_pred.to_string()),
object: Term::Variable("Y".to_string()),
}],
}
}
#[test]
fn test_dependency_graph_basic() {
let index = RuleIndex::with_defaults();
index.add_rule(create_chain_rule("r1", "parent", "ancestor"));
index.add_rule(create_chain_rule("r2", "ancestor", "reachable"));
let graph = index.dependency_graph();
assert_eq!(graph.edge_count(), 1);
let triggered = graph.triggered_by(0);
assert_eq!(triggered.len(), 1);
assert_eq!(triggered[0], 1);
}
#[test]
fn test_dependency_graph_triggers_of() {
let index = RuleIndex::with_defaults();
index.add_rule(create_chain_rule("r1", "parent", "ancestor"));
index.add_rule(create_chain_rule("r2", "ancestor", "reachable"));
let graph = index.dependency_graph();
let triggers = graph.triggers_of(1);
assert_eq!(triggers.len(), 1);
assert_eq!(triggers[0], 0);
}
#[test]
fn test_dependency_graph_roots() {
let index = RuleIndex::with_defaults();
index.add_rule(create_chain_rule("r1", "parent", "ancestor"));
index.add_rule(create_chain_rule("r2", "ancestor", "reachable"));
index.add_rule(create_chain_rule("r3", "reachable", "connected"));
let graph = index.dependency_graph();
let roots = graph.roots();
assert_eq!(roots.len(), 1);
assert_eq!(roots[0], 0);
}
#[test]
fn test_dependency_graph_no_cycle() {
let index = RuleIndex::with_defaults();
index.add_rule(create_chain_rule("r1", "parent", "ancestor"));
index.add_rule(create_chain_rule("r2", "ancestor", "reachable"));
let graph = index.dependency_graph();
assert!(!graph.has_cycle());
}
#[test]
fn test_dependency_graph_cycle() {
let index = RuleIndex::with_defaults();
index.add_rule(create_chain_rule("r1", "a", "b"));
index.add_rule(create_chain_rule("r2", "b", "a"));
let graph = index.dependency_graph();
assert!(graph.has_cycle());
}
#[test]
fn test_dependency_graph_topological_order() {
let index = RuleIndex::with_defaults();
index.add_rule(create_chain_rule("r1", "parent", "ancestor"));
index.add_rule(create_chain_rule("r2", "ancestor", "reachable"));
index.add_rule(create_chain_rule("r3", "reachable", "connected"));
let graph = index.dependency_graph();
let topo = graph.topological_order();
assert_eq!(topo.len(), 3);
let pos_r1 = topo.iter().position(|&x| x == 0).expect("r1 in topo");
let pos_r2 = topo.iter().position(|&x| x == 1).expect("r2 in topo");
let pos_r3 = topo.iter().position(|&x| x == 2).expect("r3 in topo");
assert!(pos_r1 < pos_r2);
assert!(pos_r2 < pos_r3);
}
#[test]
fn test_dependency_graph_topological_cycle_empty() {
let index = RuleIndex::with_defaults();
index.add_rule(create_chain_rule("r1", "a", "b"));
index.add_rule(create_chain_rule("r2", "b", "a"));
let graph = index.dependency_graph();
let topo = graph.topological_order();
assert!(topo.is_empty()); }
#[test]
fn test_dependency_graph_empty_index() {
let index = RuleIndex::with_defaults();
let graph = index.dependency_graph();
assert_eq!(graph.edge_count(), 0);
assert!(graph.roots().is_empty());
assert!(!graph.has_cycle());
}
#[test]
fn test_dependency_graph_self_no_cycle() {
let index = RuleIndex::with_defaults();
index.add_rule(create_test_rule("r1", "parent"));
let graph = index.dependency_graph();
assert_eq!(graph.edge_count(), 0);
}
#[test]
fn test_dependency_graph_edges() {
let index = RuleIndex::with_defaults();
index.add_rule(create_chain_rule("r1", "parent", "ancestor"));
index.add_rule(create_chain_rule("r2", "ancestor", "reachable"));
let graph = index.dependency_graph();
let edges = graph.edges();
assert_eq!(edges.len(), 1);
assert_eq!(edges[0].from, 0);
assert_eq!(edges[0].to, 1);
assert_eq!(edges[0].predicate, "ancestor");
}
#[test]
fn test_priority_set_and_get() {
let mut pi = PriorityIndex::new();
pi.set_priority(0, 10);
pi.set_priority(1, 5);
assert_eq!(pi.get_priority(0), 10);
assert_eq!(pi.get_priority(1), 5);
assert_eq!(pi.get_priority(99), 0); }
#[test]
fn test_priority_sort() {
let mut pi = PriorityIndex::new();
pi.set_priority(0, 1);
pi.set_priority(1, 10);
pi.set_priority(2, 5);
let mut ids = vec![0, 1, 2];
pi.sort_by_priority(&mut ids);
assert_eq!(ids, vec![1, 2, 0]); }
#[test]
fn test_priority_ordered_rules() {
let index = RuleIndex::with_defaults();
index.add_rule(create_test_rule("low", "p1"));
index.add_rule(create_test_rule("high", "p2"));
index.add_rule(create_test_rule("mid", "p3"));
let mut pi = PriorityIndex::new();
pi.set_priority(0, 1);
pi.set_priority(1, 100);
pi.set_priority(2, 50);
let ordered = pi.ordered_rules(&index);
assert_eq!(ordered.len(), 3);
assert_eq!(ordered[0].name, "high");
assert_eq!(ordered[1].name, "mid");
assert_eq!(ordered[2].name, "low");
}
#[test]
fn test_priority_remove() {
let mut pi = PriorityIndex::new();
pi.set_priority(0, 10);
assert_eq!(pi.len(), 1);
pi.remove_priority(0);
assert_eq!(pi.len(), 0);
assert_eq!(pi.get_priority(0), 0);
}
#[test]
fn test_priority_is_empty() {
let pi = PriorityIndex::new();
assert!(pi.is_empty());
}
#[test]
fn test_priority_clear() {
let mut pi = PriorityIndex::new();
pi.set_priority(0, 10);
pi.set_priority(1, 20);
pi.clear();
assert!(pi.is_empty());
}
#[test]
fn test_wildcard_star() {
assert!(wildcard_matches("*", "anything"));
assert!(wildcard_matches("*", ""));
}
#[test]
fn test_wildcard_exact() {
assert!(wildcard_matches("parent", "parent"));
assert!(!wildcard_matches("parent", "child"));
}
#[test]
fn test_wildcard_prefix() {
assert!(wildcard_matches("par*", "parent"));
assert!(!wildcard_matches("par*", "child"));
}
#[test]
fn test_wildcard_suffix() {
assert!(wildcard_matches("*ent", "parent"));
assert!(!wildcard_matches("*ent", "child"));
}
#[test]
fn test_wildcard_middle() {
assert!(wildcard_matches("p*t", "parent"));
assert!(wildcard_matches("p*t", "pet"));
assert!(!wildcard_matches("p*t", "patch")); }
#[test]
fn test_wildcard_find_rules() {
let index = RuleIndex::with_defaults();
index.add_rule(create_test_rule("r1", "parent"));
index.add_rule(create_test_rule("r2", "child"));
index.add_rule(create_test_rule("r3", "partner"));
let found = index.find_rules_by_wildcard("par*");
assert_eq!(found.len(), 2); }
#[test]
fn test_wildcard_star_all() {
let index = RuleIndex::with_defaults();
index.add_rule(create_test_rule("r1", "parent"));
index.add_rule(create_test_rule("r2", "child"));
let found = index.find_rules_by_wildcard("*");
assert_eq!(found.len(), 2);
}
#[test]
fn test_reindex_rule() {
let index = RuleIndex::with_defaults();
let id = index.add_rule(create_test_rule("r1", "parent"));
assert!(index.reindex_rule(id));
let found = index.find_rules_for_triple(None, "parent", None);
assert_eq!(found.len(), 1);
}
#[test]
fn test_reindex_invalid_id() {
let index = RuleIndex::with_defaults();
assert!(!index.reindex_rule(999));
}
#[test]
fn test_replace_rule() {
let index = RuleIndex::with_defaults();
let id = index.add_rule(create_test_rule("r1", "parent"));
let old = index.replace_rule(id, create_test_rule("r1_new", "child"));
assert!(old.is_some());
assert_eq!(old.expect("old rule").name, "r1");
let retrieved = index.get_rule(id);
assert!(retrieved.is_some());
assert_eq!(retrieved.expect("rule exists").name, "r1_new");
let found_child = index.find_rules_for_triple(None, "child", None);
assert!(found_child.contains(&id));
let found_by_pred = index.find_rules_by_predicate("parent");
assert!(found_by_pred.is_empty());
}
#[test]
fn test_replace_rule_invalid_id() {
let index = RuleIndex::with_defaults();
let result = index.replace_rule(999, create_test_rule("r1", "parent"));
assert!(result.is_none());
}
#[test]
fn test_predicate_density() {
let index = RuleIndex::with_defaults();
assert_eq!(index.predicate_density(), 0.0);
index.add_rule(create_test_rule("r1", "parent"));
index.add_rule(create_test_rule("r2", "parent"));
index.add_rule(create_test_rule("r3", "child"));
let density = index.predicate_density();
assert!(density > 1.0);
}
#[test]
fn test_add_rules_batch() {
let index = RuleIndex::with_defaults();
let ids = index.add_rules(vec![
create_test_rule("r1", "parent"),
create_test_rule("r2", "child"),
create_test_rule("r3", "sibling"),
]);
assert_eq!(ids.len(), 3);
assert_eq!(index.rule_count(), 3);
}
#[test]
fn test_remove_nonexistent_rule() {
let index = RuleIndex::with_defaults();
let result = index.remove_rule(999);
assert!(result.is_none());
}
}