lace/lib.rs
1//! A fast, extensible probabilistic cross-categorization engine.
2//!
3//!
4//! Lace is a probabilistic cross-categorization engine written in rust with an
5//! optional interface to python. Unlike traditional machine learning methods, which
6//! learn some function mapping inputs to outputs, Lace learns a joint probability
7//! distribution over your dataset, which enables users to...
8//!
9//! - predict or compute likelihoods of any number of features conditioned on any
10//! number of other features
11//! - identify, quantify, and attribute uncertainty from variance in the data,
12//! epistemic uncertainty in the model, and missing features
13//! - determine which variables are predictive of which others
14//! - determine which records/rows are similar to which others on the whole or
15//! given a specific context
16//! - simulate and manipulate synthetic data
17//! - work natively with missing data and make inferences about missingness
18//! (missing not-at-random)
19//! - work with continuous and categorical data natively, without transformation
20//! - identify anomalies, errors, and inconsistencies within the data
21//! - edit, backfill, and append data without retraining
22//!
23//! and more, all in one place, without any explicit model building.
24//!
25//!
26//! # Design
27//! Lace learns a probabilistic model of tabular data using cross-categorization.
28//! The general steps to operation are
29//!
30//! * Create a [`prelude::Codebook`] which describes your data. One can be
31//! autogenerated but it is best to check it before use.
32//! * Create an [`prelude::Engine`] with your data and codebook.
33//! * Train the [`prelude::Engine`] and monitor the model likelihood for
34//! convergence.
35//! * Ask questions via the [`prelude::OracleT`] implementation of [`prelude::Engine`] to explore your data.
36//!
37//!
38//! # Example
39//!
40//! (For a complete tutorial, see the [Lace Book](https://TODO))
41//!
42//! The following example uses the pre-trained `animals` example dataset.
43//! Each row represents an animal and each column represents a feature of that
44//! animal.
45//! The feature is present if the cell value is 1 and is absent if the value is 0.
46//!
47//! First, we create an oracle and import some `enum`s that allow us to call
48//! out some of the row and column indices in plain English.
49//!
50//! ```rust
51//! use lace::prelude::*;
52//! use lace::examples::Example;
53//!
54//! let oracle = Example::Animals.oracle().unwrap();
55//! ```
56//! Let's ask about the statistical dependence between whether something swims
57//! and is fast or has flippers. We expect that something swimming is more
58//! indicative of whether it swims than whether something is fast, therefore we
59//! expect the dependence between swims and flippers to be higher.
60//!
61//! ```rust
62//! # use lace::prelude::*;
63//! # use lace::examples::Example;
64//! # let oracle = Example::Animals.oracle().unwrap();
65//! let depprob_fast = oracle.depprob(
66//! "swims",
67//! "fast",
68//! ).unwrap();
69//!
70//! let depprob_flippers = oracle.depprob(
71//! "swims",
72//! "flippers",
73//! ).unwrap();
74//!
75//! assert!(depprob_flippers > depprob_fast);
76//! ```
77//!
78//! We have the same expectation of mutual information. Mutual information
79//! requires more input from the user. We need to know what type of mutual
80//! information, and how many samples to take if we need to estimate the mutual
81//! information.
82//!
83//! ```rust
84//! # use lace::prelude::*;
85//! # use lace::examples::Example;
86//! # let oracle = Example::Animals.oracle().unwrap();
87//! let mut rng = rand::rng();
88//!
89//! let mi_fast = oracle.mi(
90//! "swims",
91//! "fast",
92//! 1000,
93//! MiType::Iqr,
94//! ).unwrap();
95//!
96//! let mi_flippers = oracle.mi(
97//! "swims",
98//! "flippers",
99//! 1000,
100//! MiType::Iqr,
101//! ).unwrap();
102//!
103//! assert!(mi_flippers > mi_fast);
104//! ```
105//!
106//! We can likewise ask about the similarity between rows -- in this case,
107//! animals.
108//!
109//! ```
110//! # use lace::prelude::*;
111//! # use lace::examples::Example;
112//! # let oracle = Example::Animals.oracle().unwrap();
113//! let wrt: Option<&[usize]> = None;
114//! let rowsim_wolf = oracle.rowsim(
115//! "wolf",
116//! "chihuahua",
117//! wrt,
118//! RowSimilarityVariant::ViewWeighted,
119//! ).unwrap();
120//!
121//! let rowsim_rat = oracle.rowsim(
122//! "rat",
123//! "chihuahua",
124//! wrt,
125//! RowSimilarityVariant::ViewWeighted,
126//! ).unwrap();
127//!
128//! assert!(rowsim_rat > rowsim_wolf);
129//! ```
130//!
131//! And we can add context to similarity.
132//!
133//! ```
134//! # use lace::prelude::*;
135//! # use lace::examples::Example;
136//! # let oracle = Example::Animals.oracle().unwrap();
137//! let context = vec!["swims"];
138//! let rowsim_otter = oracle.rowsim(
139//! "beaver",
140//! "otter",
141//! Some(&context),
142//! RowSimilarityVariant::ViewWeighted,
143//! ).unwrap();
144//!
145//! let rowsim_dolphin = oracle.rowsim(
146//! "beaver",
147//! "dolphin",
148//! Some(&context),
149//! RowSimilarityVariant::ViewWeighted,
150//! ).unwrap();
151//! ```
152//!
153//! # Feature flags
154//! - `formats`: create `Engine`s and `Codebook`s from IPC, CSV, JSON, and
155//! Parquet data files
156//! - `bencher`: Build benchmarking utilities
157//! - `ctrc_handler`: enables and update handler than captures Ctrl+C
158//!
159#![warn(unused_extern_crates)]
160#![warn(
161 clippy::all,
162 clippy::imprecise_flops,
163 clippy::suboptimal_flops,
164 clippy::unseparated_literal_suffix,
165 clippy::unreadable_literal,
166 clippy::option_option,
167 clippy::implicit_clone,
168 clippy::perf
169)]
170
171#[cfg(feature = "bencher")]
172pub mod bencher;
173#[cfg(feature = "examples")]
174pub mod examples;
175
176pub mod cc;
177pub mod codebook;
178pub mod config;
179pub mod consts;
180pub mod data;
181pub mod defaults;
182pub mod geweke;
183mod interface;
184pub mod metadata;
185pub mod misc;
186pub mod optimize;
187pub mod prelude;
188pub mod stats;
189pub mod utils;
190
191mod index;
192
193pub use config::EngineUpdateConfig;
194pub use index::*;
195pub use interface::update_handler;
196pub use interface::AppendStrategy;
197pub use interface::BuildEngineError;
198pub use interface::ConditionalEntropyType;
199pub use interface::DatalessOracle;
200pub use interface::Engine;
201pub use interface::EngineBuilder;
202pub use interface::Given;
203pub use interface::HasData;
204pub use interface::HasStates;
205pub use interface::InsertDataActions;
206pub use interface::InsertMode;
207pub use interface::Metadata;
208pub use interface::MiComponents;
209pub use interface::MiType;
210pub use interface::Oracle;
211pub use interface::OracleT;
212pub use interface::OverwriteMode;
213pub use interface::Row;
214pub use interface::RowSimilarityVariant;
215pub use interface::SupportExtension;
216pub use interface::Value;
217pub use interface::WriteMode;
218
219pub mod error {
220 pub use super::interface::error::*;
221}
222
223use std::fmt::Debug;
224
225use serde::Serialize;
226
227#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
228pub struct ParseError<T: Serialize + Debug + Clone + PartialEq + Eq>(T);
229
230impl<T> std::fmt::Display for ParseError<T>
231where
232 T: Serialize + Debug + Clone + PartialEq + Eq,
233{
234 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
235 write!(f, "{self:?}")
236 }
237}
238
239pub use cc::feature::FType;
240pub use cc::state::StateDiagnostics;
241pub use cc::transition::StateTransition;
242pub use data::Category;
243pub use data::Datum;
244pub use data::SummaryStatistics;
245pub use rv;