a_tree/lib.rs
1//! An implementation of the [A-Tree: A Dynamic Data Structure for Efficiently Indexing Arbitrary Boolean Expressions](https://dl.acm.org/doi/10.1145/3448016.3457266) paper.
2//!
3//! # Examples
4//!
5//! Searching for some boolean expressions:
6//!
7//! ```
8//! use a_tree::{ATree, AttributeDefinition};
9//! use std::collections::HashMap;
10//!
11//! // Create the A-Tree
12//! let mut atree = ATree::new(&[
13//! AttributeDefinition::string_list("deal_ids"),
14//! AttributeDefinition::integer("exchange_id"),
15//! AttributeDefinition::boolean("debug"),
16//! AttributeDefinition::integer_list("segment_ids"),
17//! ]).unwrap();
18//!
19//! let expression_1 = r#"deal_ids one of ["deal-1", "deal-2"]"#;
20//! let expression_2 = r#"segment_ids one of [1, 2, 3, 4]"#;
21//! // Insert the arbitrary boolean expressions
22//! let expressions_by_ids = vec![
23//! (1u64, expression_1),
24//! (2u64, expression_2)
25//! ];
26//! let mut mappings: HashMap<u64, &str> = HashMap::new();
27//! for (id, expression) in &expressions_by_ids {
28//! atree.insert(id, expression).unwrap();
29//! mappings.insert(*id, expression);
30//! }
31//!
32//! // Create an event
33//! let mut builder = atree.make_event();
34//! builder.with_string_list("deal_ids", &["deal-2"]).unwrap();
35//! builder.with_integer_list("segment_ids", &[1, 2]).unwrap();
36//! builder.with_boolean("debug", false).unwrap();
37//! let event = builder.build().unwrap();
38//!
39//! // Search for matching boolean expressions
40//! let report = atree.search(&event).unwrap();
41//! report.matches().iter().for_each(|id| {
42//! println!(r#"Found ID: {id}, Expression: "{}""#, mappings[id]);
43//! });
44//! ```
45//!
46//! # Domain Specific Language (DSL)
47//!
48//! The A-Tree crate support a DSL to allow easy creation of arbitrary boolean expressions (ABE).
49//! The following operators are supported:
50//!
51//! * Boolean operators: `and` (`&&`), `or` (`||`), `not` (`!`) and `variable` where `variable` is a defined attribute for the A-Tree;
52//! * Comparison: `<`, `<=`, `>`, `>=`. They work for `integer` and `float`;
53//! * Equality: `=` and `<>`. They work for `integer`, `float` and `string`;
54//! * Null: `is null`, `is not null` (for variables), `is empty` and `is not empty` (for lists);
55//! * Set: `in` and `not in`. They work for list of `integer` or for list of `string`;
56//! * List: `one of`, `none of` and `all of`. They work for list of `integer` and list of `string`.
57//!
58//! As an example, the following would all be valid ABEs:
59//!
60//! ```text
61//! (exchange_id = 1 and deals one of ["deal-1", "deal-2", "deal-3"]) and (segment_ids one of [1, 2, 3]) and (continent = 'NA' and country in ["US", "CA"])
62//! (exchange_id = 1 and deals one of ("deal-1", "deal-2", "deal-3")) and (segment_ids one of (1, 2, 3)) and country in ("IN")
63//! (log_level = 'debug') and (month in [1, 2, 3] and day in [15, 16]) or (month in [4, 5, 6] and day in [10, 11])
64//! ```
65//!
66//! # Optimizations
67//!
68//! The A-Tree is a data structure that can efficiently search a large amount of arbitrary boolean
69//! expressions for ones that match a specific event. To achieve this, there are a certain amount
70//! of things that the A-Tree will do:
71//!
72//! * Search for duplicated intermediary boolean expressions nodes (i.e. if there are two
73//! expressions such as `(A ∧ (B ∧ C))` and `(D ∨ (B ∧ C))`, the tree will find the common
74//! sub-expression `(B ∧ C)` and will make both expression refer to the common node);
75//! * Convert the strings to IDs to accelerate comparison and search;
76//! * Sort the lists of strings/integers and remove duplicates;
77//! * Sort the sub-expressions by cost:
78//! * variable substitution/null checks/empty checks < set operations < lists operations
79//! * the length of the lists has an impact on that cost too for set operations and lists
80//! operations;
81//! * the cost of binary boolean operators (OR and AND) are the combined cost of their
82//! sub-expressions;
83//! * Evaluate the predicates lazily while searching;
84//! * _Zero suppression filter_: Reduce the amount of nodes to evaluate by applying
85//! De Morgan's laws and eliminating the NOT nodes;
86//! * _Propagation on demand_: Choose an access child for the AND operators and only
87//! propagate the result if the access child is true.
88mod ast;
89mod atree;
90mod error;
91mod evaluation;
92mod events;
93mod lexer;
94mod parser;
95mod predicates;
96mod strings;
97#[cfg(test)]
98mod test_utils;
99
100pub use crate::{
101 atree::{ATree, Report},
102 error::ATreeError,
103 events::{AttributeDefinition, Event, EventBuilder, EventError},
104};