use anyhow::Result;
use memmap::Mmap;
use std::path::Path;
use std::{collections::BTreeMap, marker::PhantomData};
pub use tangram_macro::{
predict_input, ClassificationOutputValue, PredictInput, PredictInputValue,
};
use url::Url;
pub struct Model<Input = PredictInput, Output = PredictOutput>
where
Input: Into<PredictInput>,
Output: From<PredictOutput> + Into<PredictOutput>,
{
model: tangram_core::predict::Model,
log_queue: Vec<Event>,
tangram_url: Url,
input_marker: PhantomData<Input>,
output_marker: PhantomData<Output>,
}
pub struct LoadModelOptions {
pub tangram_url: Option<Url>,
}
#[derive(Clone, Debug, serde::Serialize)]
pub struct PredictInput(pub BTreeMap<String, PredictInputValue>);
impl From<PredictInput> for tangram_core::predict::PredictInput {
fn from(value: PredictInput) -> tangram_core::predict::PredictInput {
tangram_core::predict::PredictInput(
value
.0
.into_iter()
.map(|(key, value)| (key, value.into()))
.collect(),
)
}
}
#[derive(Clone, Debug, serde::Serialize)]
#[serde(untagged)]
pub enum PredictInputValue {
Number(f64),
String(String),
}
impl From<PredictInputValue> for tangram_core::predict::PredictInputValue {
fn from(value: PredictInputValue) -> tangram_core::predict::PredictInputValue {
match value {
PredictInputValue::Number(value) => {
tangram_core::predict::PredictInputValue::Number(value)
}
PredictInputValue::String(value) => {
tangram_core::predict::PredictInputValue::String(value)
}
}
}
}
impl From<f64> for PredictInputValue {
fn from(value: f64) -> Self {
PredictInputValue::Number(value)
}
}
impl From<f32> for PredictInputValue {
fn from(value: f32) -> Self {
PredictInputValue::Number(value as f64)
}
}
impl From<String> for PredictInputValue {
fn from(value: String) -> Self {
PredictInputValue::String(value)
}
}
impl From<&str> for PredictInputValue {
fn from(value: &str) -> Self {
PredictInputValue::String(value.to_owned())
}
}
#[derive(Clone, Debug, serde::Serialize)]
pub struct PredictOptions {
pub threshold: Option<f32>,
pub compute_feature_contributions: Option<bool>,
}
impl From<PredictOptions> for tangram_core::predict::PredictOptions {
fn from(value: PredictOptions) -> tangram_core::predict::PredictOptions {
let mut options = tangram_core::predict::PredictOptions::default();
if let Some(threshold) = value.threshold {
options.threshold = threshold;
}
if let Some(compute_feature_contributions) = value.compute_feature_contributions {
options.compute_feature_contributions = compute_feature_contributions;
}
options
}
}
#[derive(Debug, serde::Serialize)]
#[serde(untagged)]
pub enum PredictOutput {
Regression(RegressionPredictOutput),
BinaryClassification(BinaryClassificationPredictOutput),
MulticlassClassification(MulticlassClassificationPredictOutput),
}
impl From<RegressionPredictOutput> for PredictOutput {
fn from(value: RegressionPredictOutput) -> Self {
PredictOutput::Regression(value)
}
}
impl<T> From<BinaryClassificationPredictOutput<T>> for PredictOutput
where
T: ClassificationOutputValue,
{
fn from(value: BinaryClassificationPredictOutput<T>) -> Self {
PredictOutput::BinaryClassification(BinaryClassificationPredictOutput {
class_name: value.class_name.as_str().to_owned(),
probability: value.probability,
feature_contributions: value.feature_contributions,
})
}
}
impl<T> From<MulticlassClassificationPredictOutput<T>> for PredictOutput
where
T: ClassificationOutputValue,
{
fn from(value: MulticlassClassificationPredictOutput<T>) -> Self {
PredictOutput::MulticlassClassification(MulticlassClassificationPredictOutput {
class_name: value.class_name.as_str().to_owned(),
probability: value.probability,
probabilities: value.probabilities,
feature_contributions: value.feature_contributions,
})
}
}
impl From<tangram_core::predict::PredictOutput> for PredictOutput {
fn from(value: tangram_core::predict::PredictOutput) -> Self {
match value {
tangram_core::predict::PredictOutput::Regression(value) => {
PredictOutput::Regression(value.into())
}
tangram_core::predict::PredictOutput::BinaryClassification(value) => {
PredictOutput::BinaryClassification(value.into())
}
tangram_core::predict::PredictOutput::MulticlassClassification(value) => {
PredictOutput::MulticlassClassification(value.into())
}
}
}
}
#[derive(Debug, serde::Serialize)]
pub struct RegressionPredictOutput {
pub value: f32,
pub feature_contributions: Option<FeatureContributions>,
}
impl From<tangram_core::predict::RegressionPredictOutput> for RegressionPredictOutput {
fn from(value: tangram_core::predict::RegressionPredictOutput) -> Self {
RegressionPredictOutput {
value: value.value,
feature_contributions: value.feature_contributions.map(Into::into),
}
}
}
impl From<PredictOutput> for RegressionPredictOutput {
fn from(value: PredictOutput) -> Self {
match value {
PredictOutput::Regression(value) => value,
_ => panic!("expected regression predict output"),
}
}
}
impl<T> From<PredictOutput> for MulticlassClassificationPredictOutput<T>
where
T: ClassificationOutputValue,
{
fn from(value: PredictOutput) -> Self {
match value {
PredictOutput::MulticlassClassification(value) => {
MulticlassClassificationPredictOutput {
class_name: T::from_str(&value.class_name),
probability: value.probability,
probabilities: value.probabilities,
feature_contributions: value.feature_contributions,
}
}
_ => panic!("expected multiclass classification predict output"),
}
}
}
pub trait ClassificationOutputValue {
fn from_str(value: &str) -> Self;
fn as_str(&self) -> &str;
}
impl ClassificationOutputValue for String {
fn from_str(value: &str) -> Self {
value.to_owned()
}
fn as_str(&self) -> &str {
self
}
}
#[derive(Debug, serde::Serialize)]
pub struct BinaryClassificationPredictOutput<T = String>
where
T: ClassificationOutputValue,
{
pub class_name: T,
pub probability: f32,
pub feature_contributions: Option<FeatureContributions>,
}
impl<T> From<tangram_core::predict::BinaryClassificationPredictOutput>
for BinaryClassificationPredictOutput<T>
where
T: ClassificationOutputValue,
{
fn from(value: tangram_core::predict::BinaryClassificationPredictOutput) -> Self {
BinaryClassificationPredictOutput {
class_name: T::from_str(&value.class_name),
probability: value.probability,
feature_contributions: value.feature_contributions.map(Into::into),
}
}
}
impl<T> From<tangram_core::predict::PredictOutput> for BinaryClassificationPredictOutput<T>
where
T: ClassificationOutputValue,
{
fn from(value: tangram_core::predict::PredictOutput) -> Self {
match value {
tangram_core::predict::PredictOutput::BinaryClassification(value) => value.into(),
_ => panic!("expected binary classification predict output"),
}
}
}
impl<T> From<PredictOutput> for BinaryClassificationPredictOutput<T>
where
T: ClassificationOutputValue,
{
fn from(value: PredictOutput) -> Self {
match value {
PredictOutput::BinaryClassification(value) => BinaryClassificationPredictOutput {
class_name: T::from_str(&value.class_name),
probability: value.probability,
feature_contributions: value.feature_contributions,
},
_ => panic!("expected binary classification predict output"),
}
}
}
#[derive(Debug, serde::Serialize)]
pub struct MulticlassClassificationPredictOutput<T = String>
where
T: ClassificationOutputValue,
{
pub class_name: T,
pub probability: f32,
pub probabilities: BTreeMap<String, f32>,
pub feature_contributions: Option<BTreeMap<String, FeatureContributions>>,
}
impl<T> From<tangram_core::predict::MulticlassClassificationPredictOutput>
for MulticlassClassificationPredictOutput<T>
where
T: ClassificationOutputValue,
{
fn from(value: tangram_core::predict::MulticlassClassificationPredictOutput) -> Self {
MulticlassClassificationPredictOutput {
class_name: T::from_str(&value.class_name),
probability: value.probability,
probabilities: value.probabilities,
feature_contributions: value.feature_contributions.map(|feature_contributions| {
feature_contributions
.into_iter()
.map(|(key, value)| (key, value.into()))
.collect()
}),
}
}
}
impl<T> From<tangram_core::predict::PredictOutput> for MulticlassClassificationPredictOutput<T>
where
T: ClassificationOutputValue,
{
fn from(value: tangram_core::predict::PredictOutput) -> Self {
match value {
tangram_core::predict::PredictOutput::MulticlassClassification(value) => value.into(),
_ => panic!("expected multiclass classification predict output"),
}
}
}
#[derive(Debug, serde::Serialize)]
pub struct FeatureContributions {
pub baseline_value: f32,
pub output_value: f32,
pub entries: Vec<FeatureContributionEntry>,
}
impl From<tangram_core::predict::FeatureContributions> for FeatureContributions {
fn from(value: tangram_core::predict::FeatureContributions) -> Self {
FeatureContributions {
baseline_value: value.baseline_value,
output_value: value.output_value,
entries: value.entries.into_iter().map(Into::into).collect(),
}
}
}
#[derive(Debug, serde::Serialize)]
#[serde(tag = "type")]
pub enum FeatureContributionEntry {
#[serde(rename = "identity")]
Identity(IdentityFeatureContribution),
#[serde(rename = "normalized")]
Normalized(NormalizedFeatureContribution),
#[serde(rename = "one_hot_encoded")]
OneHotEncoded(OneHotEncodedFeatureContribution),
#[serde(rename = "bag_of_words")]
BagOfWords(BagOfWordsFeatureContribution),
#[serde(rename = "bag_of_words_cosine_similarity")]
BagOfWordsCosineSimilarity(BagOfWordsCosineSimilarityFeatureContribution),
#[serde(rename = "word_embedding")]
WordEmbedding(WordEmbeddingFeatureContribution),
}
impl From<tangram_core::predict::FeatureContributionEntry> for FeatureContributionEntry {
fn from(value: tangram_core::predict::FeatureContributionEntry) -> Self {
match value {
tangram_core::predict::FeatureContributionEntry::Identity(value) => {
FeatureContributionEntry::Identity(value.into())
}
tangram_core::predict::FeatureContributionEntry::Normalized(value) => {
FeatureContributionEntry::Normalized(value.into())
}
tangram_core::predict::FeatureContributionEntry::OneHotEncoded(value) => {
FeatureContributionEntry::OneHotEncoded(value.into())
}
tangram_core::predict::FeatureContributionEntry::BagOfWords(value) => {
FeatureContributionEntry::BagOfWords(value.into())
}
tangram_core::predict::FeatureContributionEntry::BagOfWordsCosineSimilarity(value) => {
FeatureContributionEntry::BagOfWordsCosineSimilarity(value.into())
}
tangram_core::predict::FeatureContributionEntry::WordEmbedding(value) => {
FeatureContributionEntry::WordEmbedding(value.into())
}
}
}
}
#[derive(Debug, serde::Serialize)]
pub struct IdentityFeatureContribution {
pub column_name: String,
pub feature_value: f32,
pub feature_contribution_value: f32,
}
impl From<tangram_core::predict::IdentityFeatureContribution> for IdentityFeatureContribution {
fn from(value: tangram_core::predict::IdentityFeatureContribution) -> Self {
IdentityFeatureContribution {
column_name: value.column_name,
feature_value: value.feature_value,
feature_contribution_value: value.feature_contribution_value,
}
}
}
#[derive(Debug, serde::Serialize)]
pub struct NormalizedFeatureContribution {
pub column_name: String,
pub feature_value: f32,
pub feature_contribution_value: f32,
}
impl From<tangram_core::predict::NormalizedFeatureContribution> for NormalizedFeatureContribution {
fn from(value: tangram_core::predict::NormalizedFeatureContribution) -> Self {
NormalizedFeatureContribution {
column_name: value.column_name,
feature_value: value.feature_value,
feature_contribution_value: value.feature_contribution_value,
}
}
}
#[derive(Debug, serde::Serialize)]
pub struct OneHotEncodedFeatureContribution {
pub column_name: String,
pub variant: Option<String>,
pub feature_value: bool,
pub feature_contribution_value: f32,
}
impl From<tangram_core::predict::OneHotEncodedFeatureContribution>
for OneHotEncodedFeatureContribution
{
fn from(value: tangram_core::predict::OneHotEncodedFeatureContribution) -> Self {
OneHotEncodedFeatureContribution {
column_name: value.column_name,
variant: value.variant,
feature_value: value.feature_value,
feature_contribution_value: value.feature_contribution_value,
}
}
}
#[derive(Debug, serde::Serialize)]
pub struct BagOfWordsFeatureContribution {
pub column_name: String,
pub ngram: NGram,
pub feature_value: f32,
pub feature_contribution_value: f32,
}
impl From<tangram_core::predict::BagOfWordsFeatureContribution> for BagOfWordsFeatureContribution {
fn from(value: tangram_core::predict::BagOfWordsFeatureContribution) -> Self {
BagOfWordsFeatureContribution {
column_name: value.column_name,
ngram: value.ngram.into(),
feature_value: value.feature_value,
feature_contribution_value: value.feature_contribution_value,
}
}
}
#[derive(Debug, serde::Serialize)]
#[serde(untagged)]
pub enum NGram {
Unigram(String),
Bigram(String, String),
}
impl From<tangram_core::predict::NGram> for NGram {
fn from(value: tangram_core::predict::NGram) -> Self {
match value {
tangram_core::predict::NGram::Unigram(token) => NGram::Unigram(token),
tangram_core::predict::NGram::Bigram(token_a, token_b) => {
NGram::Bigram(token_a, token_b)
}
}
}
}
#[derive(Debug, serde::Serialize)]
pub struct BagOfWordsCosineSimilarityFeatureContribution {
pub column_name_a: String,
pub column_name_b: String,
pub feature_value: f32,
pub feature_contribution_value: f32,
}
impl From<tangram_core::predict::BagOfWordsCosineSimilarityFeatureContribution>
for BagOfWordsCosineSimilarityFeatureContribution
{
fn from(value: tangram_core::predict::BagOfWordsCosineSimilarityFeatureContribution) -> Self {
BagOfWordsCosineSimilarityFeatureContribution {
column_name_a: value.column_name_a,
column_name_b: value.column_name_b,
feature_value: value.feature_value,
feature_contribution_value: value.feature_contribution_value,
}
}
}
#[derive(Debug, serde::Serialize)]
pub struct WordEmbeddingFeatureContribution {
pub column_name: String,
pub value_index: usize,
pub feature_contribution_value: f32,
}
impl From<tangram_core::predict::WordEmbeddingFeatureContribution>
for WordEmbeddingFeatureContribution
{
fn from(value: tangram_core::predict::WordEmbeddingFeatureContribution) -> Self {
WordEmbeddingFeatureContribution {
column_name: value.column_name,
value_index: value.value_index,
feature_contribution_value: value.feature_contribution_value,
}
}
}
#[derive(Debug)]
pub struct LogPredictionArgs<Input, Output>
where
Input: Into<PredictInput>,
Output: From<PredictOutput> + Into<PredictOutput>,
{
pub identifier: NumberOrString,
pub input: Input,
pub options: Option<PredictOptions>,
pub output: Output,
}
#[derive(Debug)]
pub struct LogTrueValueArgs {
pub identifier: NumberOrString,
pub true_value: NumberOrString,
}
#[derive(serde::Serialize)]
#[serde(tag = "type")]
enum Event {
#[serde(rename = "prediction")]
Prediction(PredictionEvent),
#[serde(rename = "true_value")]
TrueValue(TrueValueEvent),
}
#[derive(Debug, serde::Serialize)]
struct PredictionEvent {
date: chrono::DateTime<chrono::Utc>,
identifier: NumberOrString,
input: PredictInput,
options: Option<PredictOptions>,
output: PredictOutput,
model_id: String,
}
#[derive(Debug, serde::Serialize)]
struct TrueValueEvent {
date: chrono::DateTime<chrono::Utc>,
identifier: NumberOrString,
model_id: String,
true_value: NumberOrString,
}
#[derive(Debug, serde::Serialize)]
#[serde(untagged)]
pub enum NumberOrString {
Number(f64),
String(String),
}
impl From<f64> for NumberOrString {
fn from(value: f64) -> Self {
NumberOrString::Number(value)
}
}
impl From<String> for NumberOrString {
fn from(value: String) -> Self {
NumberOrString::String(value)
}
}
impl From<&str> for NumberOrString {
fn from(value: &str) -> Self {
NumberOrString::String(value.to_owned())
}
}
impl<Input, Output> Model<Input, Output>
where
Input: Into<PredictInput>,
Output: From<PredictOutput> + Into<PredictOutput>,
{
pub fn from_path(
path: impl AsRef<Path>,
options: Option<LoadModelOptions>,
) -> Result<Model<Input, Output>> {
let file = std::fs::File::open(path)?;
let bytes = unsafe { Mmap::map(&file)? };
Model::from_bytes(&bytes, options)
}
pub fn from_bytes(
bytes: &[u8],
options: Option<LoadModelOptions>,
) -> Result<Model<Input, Output>> {
let model = tangram_model::from_bytes(bytes)?;
let model = tangram_core::predict::Model::from(model);
let tangram_url = options
.and_then(|options| options.tangram_url)
.unwrap_or_else(|| "https://app.tangram.dev".parse().unwrap());
Ok(Model {
model,
log_queue: Vec::new(),
tangram_url,
input_marker: PhantomData,
output_marker: PhantomData,
})
}
pub fn id(&self) -> &str {
self.model.id.as_str()
}
pub fn predict_one(&self, input: Input, options: Option<PredictOptions>) -> Output {
let model = &self.model;
let options = options.map(Into::into).unwrap_or_default();
let output = tangram_core::predict::predict(model, &[input.into().into()], &options);
let output: PredictOutput = output.into_iter().next().unwrap().into();
output.into()
}
pub fn predict(&self, input: Vec<Input>, options: Option<PredictOptions>) -> Vec<Output> {
let model = &self.model;
let options = options.map(Into::into).unwrap_or_default();
let input = input
.into_iter()
.map(Into::into)
.map(Into::into)
.collect::<Vec<_>>();
let output = tangram_core::predict::predict(model, &input, &options);
output
.into_iter()
.map(|output| -> PredictOutput { output.into() })
.map(Into::into)
.collect()
}
#[cfg(not(feature = "tokio"))]
pub fn log_prediction(&mut self, args: LogPredictionArgs<Input, Output>) -> Result<()> {
let event = Event::Prediction(self.prediction_event(args));
self.log_event(event)?;
Ok(())
}
#[cfg(feature = "tokio")]
pub async fn log_prediction(&mut self, args: LogPredictionArgs<Input, Output>) -> Result<()> {
let event = Event::Prediction(self.prediction_event(args));
self.log_event(event).await?;
Ok(())
}
#[cfg(not(feature = "tokio"))]
pub fn log_true_value(&mut self, args: LogTrueValueArgs) -> Result<()> {
let event = Event::TrueValue(self.true_value_event(args));
self.log_event(event)?;
Ok(())
}
#[cfg(feature = "tokio")]
pub async fn log_true_value(&mut self, args: LogTrueValueArgs) -> Result<()> {
let event = Event::TrueValue(self.true_value_event(args));
self.log_event(event).await?;
Ok(())
}
pub fn enqueue_log_prediction(&mut self, args: LogPredictionArgs<Input, Output>) {
let event = Event::Prediction(self.prediction_event(args));
self.log_queue.push(event);
}
pub fn enqueue_log_true_value(&mut self, args: LogTrueValueArgs) {
let event = Event::TrueValue(self.true_value_event(args));
self.log_queue.push(event);
}
#[cfg(not(feature = "tokio"))]
pub fn flush_log_queue(&mut self) -> Result<()> {
let events = self.log_queue.drain(0..self.log_queue.len()).collect();
self.log_events(events)
}
#[cfg(feature = "tokio")]
pub async fn flush_log_queue(&mut self) -> Result<()> {
let events = self.log_queue.drain(0..self.log_queue.len()).collect();
self.log_events(events)
}
#[cfg(not(feature = "tokio"))]
fn log_event(&mut self, event: Event) -> Result<()> {
self.log_events(vec![event])
}
#[cfg(feature = "tokio")]
fn log_event(&mut self, event: Event) -> Result<()> {
self.log_events(vec![event])
}
#[cfg(not(feature = "tokio"))]
fn log_events(&mut self, events: Vec<Event>) -> Result<()> {
let mut url = self.tangram_url.clone();
url.set_path("/track");
let body = serde_json::to_vec(&events)?;
reqwest::blocking::Client::new()
.post(url)
.body(body)
.send()?;
Ok(())
}
#[cfg(feature = "tokio")]
async fn log_events(&mut self, events: Vec<Event>) -> Result<()> {
let mut url = self.tangram_url.clone();
url.set_path("/track");
let body = serde_json::to_vec(&events)?;
reqwest::Client::new().post(url).body(body).send().await?;
Ok(())
}
fn prediction_event(&self, args: LogPredictionArgs<Input, Output>) -> PredictionEvent {
PredictionEvent {
date: chrono::Utc::now(),
identifier: args.identifier,
input: args.input.into(),
options: args.options,
output: args.output.into(),
model_id: self.id().to_owned(),
}
}
fn true_value_event(&self, args: LogTrueValueArgs) -> TrueValueEvent {
TrueValueEvent {
date: chrono::Utc::now(),
identifier: args.identifier,
model_id: self.id().to_owned(),
true_value: args.true_value,
}
}
}