#[macro_use]
extern crate failure;
use std::ffi::CStr;
use std::ffi::CString;
use std::mem;
use std::ptr;
use failure::Error;
pub use ffi::FeatureNode;
use util::*;
mod ffi;
pub mod util;
#[derive(Debug, Fail)]
pub enum ParameterError {
#[fail(display = "parameter error: {}", e)]
InvalidParameters {
#[doc(hidden)]
e: String,
},
}
#[derive(Debug, Fail)]
pub enum ProblemError {
#[fail(display = "input data error: {}", e)]
InvalidTrainingData {
#[doc(hidden)]
e: String,
},
}
#[derive(Debug, Fail)]
pub enum ModelError {
#[fail(display = "invalid state: {}", e)]
InvalidState {
#[doc(hidden)]
e: String,
},
#[fail(display = "serialization error: {}", e)]
SerializationError {
#[doc(hidden)]
e: String,
},
#[fail(display = "prediction error: {}", e)]
PredictionError {
#[doc(hidden)]
e: String,
},
#[fail(display = "illegal argument: {}", e)]
IllegalArgument {
#[doc(hidden)]
e: String,
},
#[fail(display = "unknown error: {}", e)]
UnknownError {
#[doc(hidden)]
e: String,
},
}
pub trait LibLinearProblem: Clone {
fn source_features(&self) -> &[Vec<FeatureNode>];
fn target_values(&self) -> &[f64];
fn bias(&self) -> f64;
}
#[doc(hidden)]
pub struct Problem {
backing_store_labels: Vec<f64>,
backing_store_features: Vec<Vec<FeatureNode>>,
_backing_store_feature_ptrs: Vec<*const FeatureNode>,
bound: ffi::Problem,
}
impl Problem {
fn new(input_data: TrainingInput, bias: f64) -> Result<Problem, ParameterError> {
let num_training_instances = input_data.len_data() as i32;
let num_features = input_data.len_features() as i32;
let has_bias = bias >= 0f64;
let last_feature_index = input_data.last_feature_index() as i32;
let (mut transformed_features, labels): (Vec<Vec<FeatureNode>>, Vec<f64>) =
input_data.yield_data().iter().fold(
(Vec::<Vec<FeatureNode>>::default(), Vec::<f64>::default()),
|(mut feats, mut labels), instance| {
feats.push(
instance
.features()
.iter()
.map(|(index, value)| FeatureNode {
index: *index as i32,
value: *value,
})
.collect(),
);
labels.push(instance.label());
(feats, labels)
},
);
transformed_features = transformed_features
.into_iter()
.map(|mut v: Vec<FeatureNode>| {
if has_bias {
v.push(FeatureNode {
index: last_feature_index + 1,
value: bias,
});
}
v.push(FeatureNode {
index: -1,
value: 0f64,
});
v
})
.collect();
let transformed_feature_ptrs: Vec<*const FeatureNode> =
transformed_features.iter().map(|e| e.as_ptr()).collect();
Ok(Problem {
bound: ffi::Problem {
l: num_training_instances as i32,
n: num_features + if has_bias { 1 } else { 0 } as i32,
y: labels.as_ptr(),
x: transformed_feature_ptrs.as_ptr(),
bias,
},
backing_store_labels: labels,
backing_store_features: transformed_features,
_backing_store_feature_ptrs: transformed_feature_ptrs,
})
}
}
impl LibLinearProblem for Problem {
fn source_features(&self) -> &[Vec<FeatureNode>] {
&self.backing_store_features
}
fn target_values(&self) -> &[f64] {
&self.backing_store_labels
}
fn bias(&self) -> f64 {
self.bound.bias
}
}
impl Clone for Problem {
fn clone(&self) -> Self {
let labels = self.backing_store_labels.clone();
let transformed_features: Vec<Vec<FeatureNode>> = self.backing_store_features.clone();
let transformed_feature_ptrs: Vec<*const FeatureNode> =
transformed_features.iter().map(|e| e.as_ptr()).collect();
Problem {
bound: ffi::Problem {
l: self.bound.l,
n: self.bound.n,
y: labels.as_ptr(),
x: transformed_feature_ptrs.as_ptr(),
bias: self.bound.bias,
},
backing_store_labels: labels,
backing_store_features: transformed_features,
_backing_store_feature_ptrs: transformed_feature_ptrs,
}
}
}
pub struct ProblemBuilder {
input_data: Option<TrainingInput>,
bias: f64,
}
impl ProblemBuilder {
fn new() -> ProblemBuilder {
ProblemBuilder {
input_data: None,
bias: -1.0,
}
}
pub fn input_data(&mut self, input_data: TrainingInput) -> &mut Self {
self.input_data = Some(input_data);
self
}
pub fn bias(&mut self, bias: f64) -> &mut Self {
self.bias = bias;
self
}
fn build(self) -> Result<Problem, Error> {
let input_data = self.input_data.ok_or(ProblemError::InvalidTrainingData {
e: "Missing input/training data".to_string(),
})?;
Ok(Problem::new(input_data, self.bias)?)
}
}
#[allow(non_camel_case_types)]
pub enum SolverType {
L2R_LR = 0,
L2R_L2LOSS_SVC_DUAL = 1,
L2R_L2LOSS_SVC = 2,
L2R_L1LOSS_SVC_DUAL = 3,
MCSVM_CS = 4,
L1R_L2LOSS_SVC = 5,
L1R_LR = 6,
L2R_LR_DUAL = 7,
L2R_L2LOSS_SVR = 11,
L2R_L2LOSS_SVR_DUAL = 12,
L2R_L1LOSS_SVR_DUAL = 13,
}
impl SolverType {
pub fn is_logistic_regression(&self) -> bool {
match self {
SolverType::L2R_LR | SolverType::L1R_LR | SolverType::L2R_LR_DUAL => true,
_ => false,
}
}
pub fn is_support_vector_regression(&self) -> bool {
match self {
SolverType::L2R_L2LOSS_SVR
| SolverType::L2R_L2LOSS_SVR_DUAL
| SolverType::L2R_L1LOSS_SVR_DUAL => true,
_ => false,
}
}
pub fn is_multi_class_classification(&self) -> bool {
!self.is_support_vector_regression()
}
}
impl Default for SolverType {
fn default() -> Self {
SolverType::L2R_LR
}
}
pub trait LibLinearParameter: Clone {
fn solver_type(&self) -> SolverType;
fn stopping_criterion(&self) -> f64;
fn constraints_violation_cost(&self) -> f64;
fn regression_loss_sensitivity(&self) -> f64;
}
#[doc(hidden)]
pub struct Parameter {
backing_store_class_cost_penalty_weights: Vec<f64>,
backing_store_class_cost_penalty_labels: Vec<i32>,
backing_store_starting_solutions: Vec<f64>,
bound: ffi::Parameter,
}
impl Parameter {
fn new(
solver: SolverType,
eps: f64,
cost: f64,
p: f64,
cost_penalty_weights: Vec<f64>,
cost_penalty_labels: Vec<i32>,
init_solutions: Vec<f64>,
) -> Result<Parameter, ParameterError> {
if cost_penalty_weights.len() != cost_penalty_labels.len() {
return Err(ParameterError::InvalidParameters {
e: "Mismatch between cost penalty weights and labels".to_string(),
});
}
let num_weights = cost_penalty_weights.len() as i32;
let param = Parameter {
bound: ffi::Parameter {
solver_type: solver as i32,
eps,
C: cost,
nr_weight: num_weights,
weight_label: if cost_penalty_labels.is_empty() {
ptr::null()
} else {
cost_penalty_labels.as_ptr()
},
weight: if cost_penalty_weights.is_empty() {
ptr::null()
} else {
cost_penalty_weights.as_ptr()
},
p,
init_sol: if init_solutions.is_empty() {
ptr::null()
} else {
init_solutions.as_ptr()
},
},
backing_store_class_cost_penalty_weights: cost_penalty_weights,
backing_store_class_cost_penalty_labels: cost_penalty_labels,
backing_store_starting_solutions: init_solutions,
};
unsafe {
let param_error = ffi::check_parameter(ptr::null(), ¶m.bound);
if !param_error.is_null() {
return Err(ParameterError::InvalidParameters {
e: CStr::from_ptr(param_error)
.to_string_lossy()
.to_owned()
.to_string(),
});
}
}
Ok(param)
}
}
impl LibLinearParameter for Parameter {
fn solver_type(&self) -> SolverType {
unsafe { mem::transmute(self.bound.solver_type as i8) }
}
fn stopping_criterion(&self) -> f64 {
self.bound.eps
}
fn constraints_violation_cost(&self) -> f64 {
self.bound.C
}
fn regression_loss_sensitivity(&self) -> f64 {
self.bound.p
}
}
impl Clone for Parameter {
fn clone(&self) -> Self {
let weights = self.backing_store_class_cost_penalty_weights.clone();
let weight_labels = self.backing_store_class_cost_penalty_labels.clone();
let init_sol = self.backing_store_starting_solutions.clone();
Parameter {
bound: ffi::Parameter {
solver_type: self.bound.solver_type as i32,
eps: self.bound.eps,
C: self.bound.C,
nr_weight: self.bound.nr_weight,
weight_label: weight_labels.as_ptr(),
weight: weights.as_ptr(),
p: self.bound.p,
init_sol: init_sol.as_ptr(),
},
backing_store_class_cost_penalty_weights: weights,
backing_store_class_cost_penalty_labels: weight_labels,
backing_store_starting_solutions: init_sol,
}
}
}
pub struct ParameterBuilder {
solver_type: SolverType,
epsilon: f64,
cost: f64,
p: f64,
cost_penalty_weights: Vec<f64>,
cost_penalty_labels: Vec<i32>,
init_solutions: Vec<f64>,
}
impl ParameterBuilder {
fn new() -> ParameterBuilder {
ParameterBuilder {
solver_type: SolverType::default(),
epsilon: 0.01,
cost: 1.0,
p: 0.1,
cost_penalty_weights: Vec::new(),
cost_penalty_labels: Vec::new(),
init_solutions: Vec::new(),
}
}
pub fn solver_type(&mut self, solver_type: SolverType) -> &mut Self {
self.solver_type = solver_type;
self
}
pub fn stopping_criterion(&mut self, epsilon: f64) -> &mut Self {
self.epsilon = epsilon;
self
}
pub fn constraints_violation_cost(&mut self, cost: f64) -> &mut Self {
self.cost = cost;
self
}
pub fn regression_loss_sensitivity(&mut self, p: f64) -> &mut Self {
self.p = p;
self
}
pub fn cost_penalty_weights(&mut self, cost_penalty_weights: Vec<f64>) -> &mut Self {
self.cost_penalty_weights = cost_penalty_weights;
self
}
pub fn cost_penalty_labels(&mut self, cost_penalty_labels: Vec<i32>) -> &mut Self {
self.cost_penalty_labels = cost_penalty_labels;
self
}
pub fn initial_solutions(&mut self, init_solutions: Vec<f64>) -> &mut Self {
self.init_solutions = init_solutions;
self
}
fn build(self) -> Result<Parameter, Error> {
Ok(Parameter::new(
self.solver_type,
self.epsilon,
self.cost,
self.p,
self.cost_penalty_weights,
self.cost_penalty_labels,
self.init_solutions,
)?)
}
}
pub trait HasLibLinearProblem {
type Output: LibLinearProblem;
fn problem(&self) -> Option<&Self::Output>;
}
pub trait HasLibLinearParameter {
type Output: LibLinearParameter;
fn parameter(&self) -> &Self::Output;
}
pub trait LibLinearModel: HasLibLinearProblem + HasLibLinearParameter {
fn predict(&self, features: PredictionInput) -> Result<f64, ModelError>;
fn predict_values(&self, features: PredictionInput) -> Result<(Vec<f64>, f64), ModelError>;
fn predict_probabilities(
&self,
features: PredictionInput,
) -> Result<(Vec<f64>, f64), ModelError>;
fn feature_coefficient(&self, feature_index: i32, label_index: i32) -> f64;
fn label_bias(&self, label_index: i32) -> f64;
fn bias(&self) -> f64;
fn labels(&self) -> &Vec<i32>;
fn num_classes(&self) -> usize;
fn num_features(&self) -> usize;
fn save_to_disk(&self, file_path: &str) -> Result<(), ModelError>;
}
pub trait LibLinearCrossValidator: HasLibLinearProblem + HasLibLinearParameter {
fn cross_validation(&self, folds: i32) -> Result<Vec<f64>, ModelError>;
fn find_optimal_constraints_violation_cost_and_loss_sensitivity(
&self,
folds: i32,
start_cost: f64,
start_loss_sensitivity: f64,
) -> Result<(f64, f64, f64), ModelError>;
}
#[doc(hidden)]
struct Model {
problem: Option<Problem>,
parameter: Parameter,
backing_store_labels: Vec<i32>,
bound: *mut ffi::Model,
}
impl Model {
fn from_input(
problem: Problem,
parameter: Parameter,
train: bool,
) -> Result<Model, ModelError> {
let mut bound: *mut ffi::Model = ptr::null_mut();
if train {
bound = unsafe { ffi::train(&problem.bound, ¶meter.bound) };
if bound.is_null() {
return Err(ModelError::UnknownError {
e: "train() returned a NULL pointer".to_owned().to_string(),
});
}
}
let mut backing_store_labels = Vec::<i32>::new();
unsafe {
if train {
for i in 0..(*bound).nr_class {
backing_store_labels.push(*(*bound).label.offset(i as isize));
}
}
}
Ok(Model {
problem: Some(problem),
parameter,
backing_store_labels,
bound,
})
}
fn from_serialized_file(path_to_serialized_model: &str) -> Result<Model, ModelError> {
unsafe {
let file_path_cstr = CString::new(path_to_serialized_model).unwrap();
let bound = ffi::load_model(file_path_cstr.as_ptr());
if bound.is_null() {
return Err(ModelError::SerializationError {
e: "load_model() returned a NULL pointer"
.to_owned()
.to_string(),
});
}
let mut backing_store_labels = Vec::<i32>::new();
for i in 0..(*bound).nr_class {
backing_store_labels.push(*(*bound).label.offset(i as isize));
}
Ok(Model {
problem: None,
parameter: Parameter::new(
mem::transmute((*bound).param.solver_type as i8),
0.01,
1.0,
0.1,
Vec::new(),
Vec::new(),
Vec::new(),
)
.unwrap(),
backing_store_labels,
bound,
})
}
}
fn preprocess_prediction_input(
&self,
prediction_input: PredictionInput,
) -> Result<Vec<FeatureNode>, PredictionInputError> {
assert_ne!(self.bound.is_null(), true);
let last_feature_index = prediction_input.last_feature_index() as i32;
if last_feature_index as usize != self.num_features() {
return Err(PredictionInputError::DataError {
e: format!(
"Expected {} features, found {} instead",
self.num_features(),
last_feature_index
)
.to_string(),
});
}
let bias = unsafe { (*self.bound).bias };
let has_bias = bias >= 0f64;
let mut data: Vec<FeatureNode> = prediction_input
.yield_data()
.iter()
.map(|(index, value)| FeatureNode {
index: *index as i32,
value: *value,
})
.collect();
if has_bias {
data.push(FeatureNode {
index: last_feature_index + 1,
value: bias,
});
}
data.push(FeatureNode {
index: -1,
value: 0f64,
});
Ok(data)
}
}
impl HasLibLinearProblem for Model {
type Output = Problem;
fn problem(&self) -> Option<&Self::Output> {
self.problem.as_ref()
}
}
impl HasLibLinearParameter for Model {
type Output = Parameter;
fn parameter(&self) -> &Self::Output {
&self.parameter
}
}
impl LibLinearModel for Model {
fn predict(&self, features: PredictionInput) -> Result<f64, ModelError> {
Ok(self.predict_values(features)?.1)
}
fn predict_values(&self, features: PredictionInput) -> Result<(Vec<f64>, f64), ModelError> {
let transformed_features = self.preprocess_prediction_input(features).map_err(|err| {
ModelError::PredictionError {
e: format!("{}", err).to_string(),
}
})?;
unsafe {
let mut output_values: Vec<f64> = match (*self.bound).nr_class {
2 => vec![0f64; 1],
l => vec![0f64; l as usize],
};
let best_class = ffi::predict_values(
self.bound,
transformed_features.as_ptr(),
output_values.as_mut_ptr(),
);
Ok((output_values, best_class))
}
}
fn predict_probabilities(
&self,
features: PredictionInput,
) -> Result<(Vec<f64>, f64), ModelError> {
let transformed_features = self.preprocess_prediction_input(features).map_err(|err| {
ModelError::PredictionError {
e: format!("{}", err).to_string(),
}
})?;
if !self.parameter.solver_type().is_logistic_regression() {
return Err(ModelError::PredictionError {
e: "Probability output is only supported for logistic regression".to_string(),
});
}
unsafe {
let mut output_probabilities = vec![0f64; (*self.bound).nr_class as usize];
let best_class = ffi::predict_values(
self.bound,
transformed_features.as_ptr(),
output_probabilities.as_mut_ptr(),
);
Ok((output_probabilities, best_class))
}
}
fn feature_coefficient(&self, feature_index: i32, label_index: i32) -> f64 {
unsafe { ffi::get_decfun_coef(self.bound, feature_index, label_index) }
}
fn label_bias(&self, label_index: i32) -> f64 {
unsafe { ffi::get_decfun_bias(self.bound, label_index) }
}
fn bias(&self) -> f64 {
unsafe { (*self.bound).bias }
}
fn labels(&self) -> &Vec<i32> {
&self.backing_store_labels
}
fn num_classes(&self) -> usize {
unsafe { (*self.bound).nr_class as usize }
}
fn num_features(&self) -> usize {
unsafe { (*self.bound).nr_feature as usize }
}
fn save_to_disk(&self, file_path: &str) -> Result<(), ModelError> {
unsafe {
let file_path_cstr = CString::new(file_path).unwrap();
let result = ffi::save_model(file_path_cstr.as_ptr(), self.bound);
if result == -1 {
return Err(ModelError::SerializationError {
e: "save_model() returned -1".to_owned().to_string(),
});
}
}
Ok(())
}
}
impl LibLinearCrossValidator for Model {
fn cross_validation(&self, folds: i32) -> Result<Vec<f64>, ModelError> {
if folds < 2 {
return Err(ModelError::IllegalArgument {
e: "Number of folds must be >= 2 for cross validator"
.to_owned()
.to_string(),
});
} else if self.problem.is_none() {
return Err(ModelError::InvalidState {
e: "Invalid problem/parameters for cross validator"
.to_owned()
.to_string(),
});
}
unsafe {
let mut output_labels = vec![0f64; self.problem.as_ref().unwrap().bound.l as usize];
ffi::cross_validation(
&self.problem.as_ref().unwrap().bound,
&self.parameter.bound,
folds,
output_labels.as_mut_ptr(),
);
Ok(output_labels)
}
}
fn find_optimal_constraints_violation_cost_and_loss_sensitivity(
&self,
folds: i32,
start_cost: f64,
start_loss_sensitivity: f64,
) -> Result<(f64, f64, f64), ModelError> {
if folds < 2 {
return Err(ModelError::IllegalArgument {
e: "Number of folds must be >= 2 for cross validator"
.to_owned()
.to_string(),
});
} else if self.problem.is_none() {
return Err(ModelError::InvalidState {
e: "Invalid problem/parameters for cross validator"
.to_owned()
.to_string(),
});
}
unsafe {
let mut best_cost = 0f64;
let mut best_rate = 0f64;
let mut best_loss_sensitivity = 0f64;
ffi::find_parameters(
&self.problem.as_ref().unwrap().bound,
&self.parameter.bound,
folds,
start_cost,
start_loss_sensitivity,
&mut best_cost,
&mut best_loss_sensitivity,
&mut best_rate,
);
Ok((best_cost, best_rate, best_loss_sensitivity))
}
}
}
impl Drop for Model {
fn drop(&mut self) {
unsafe {
let mut temp = self.bound;
ffi::free_and_destroy_model(&mut temp);
}
}
}
pub struct Builder {
problem_builder: ProblemBuilder,
parameter_builder: ParameterBuilder,
}
impl Builder {
pub fn new() -> Builder {
Builder {
problem_builder: ProblemBuilder::new(),
parameter_builder: ParameterBuilder::new(),
}
}
pub fn problem(&mut self) -> &mut ProblemBuilder {
&mut self.problem_builder
}
pub fn parameters(&mut self) -> &mut ParameterBuilder {
&mut self.parameter_builder
}
pub fn build_cross_validator(self) -> Result<impl LibLinearCrossValidator, Error> {
Ok(Model::from_input(
self.problem_builder.build()?,
self.parameter_builder.build()?,
false,
)?)
}
pub fn build_model(self) -> Result<impl LibLinearModel, Error> {
Ok(Model::from_input(
self.problem_builder.build()?,
self.parameter_builder.build()?,
true,
)?)
}
}
pub struct Serializer;
impl Serializer {
pub fn load_model(path_to_serialized_model: &str) -> Result<impl LibLinearModel, Error> {
Ok(Model::from_serialized_file(path_to_serialized_model)?)
}
pub fn save_model(
path_to_serialized_model: &str,
model: &impl LibLinearModel,
) -> Result<(), Error> {
Ok(model.save_to_disk(path_to_serialized_model)?)
}
}
pub fn liblinear_version() -> i32 {
unsafe { ffi::liblinear_version }
}
pub fn toggle_liblinear_stdout_output(state: bool) {
unsafe {
match state {
true => ffi::set_print_string_function(None),
false => ffi::set_print_string_function(Some(ffi::silence_liblinear_stdout)),
}
}
}