Struct forust_ml::gradientbooster::GradientBooster

source ·
pub struct GradientBooster {
Show 35 fields pub objective_type: ObjectiveType, pub iterations: usize, pub learning_rate: f32, pub max_depth: usize, pub max_leaves: usize, pub l1: f32, pub l2: f32, pub gamma: f32, pub max_delta_step: f32, pub min_leaf_weight: f32, pub base_score: f64, pub nbins: u16, pub parallel: bool, pub allow_missing_splits: bool, pub monotone_constraints: Option<ConstraintMap>, pub subsample: f32, pub top_rate: f64, pub other_rate: f64, pub colsample_bytree: f64, pub seed: u64, pub missing: f64, pub create_missing_branch: bool, pub sample_method: SampleMethod, pub grow_policy: GrowPolicy, pub evaluation_metric: Option<Metric>, pub early_stopping_rounds: Option<usize>, pub initialize_base_score: bool, pub terminate_missing_features: HashSet<usize>, pub evaluation_history: Option<RowMajorMatrix<f64>>, pub best_iteration: Option<usize>, pub prediction_iteration: Option<usize>, pub missing_node_treatment: MissingNodeTreatment, pub log_iterations: usize, pub force_children_to_bound_parent: bool, pub trees: Vec<Tree>, /* private fields */
}
Expand description

Gradient Booster object

Fields§

§objective_type: ObjectiveType

The name of objective function used to optimize. Valid options include “LogLoss” to use logistic loss as the objective function, or “SquaredLoss” to use Squared Error as the objective function.

§iterations: usize

Total number of trees to train in the ensemble.

§learning_rate: f32

Step size to use at each iteration. Each leaf weight is multiplied by this number. The smaller the value, the more conservative the weights will be.

§max_depth: usize

Maximum depth of an individual tree. Valid values are 0 to infinity.

§max_leaves: usize

Maximum number of leaves allowed on a tree. Valid values are 0 to infinity. This is the total number of final nodes.

§l1: f32

L1 regularization term applied to the weights of the tree. Valid values are 0 to infinity. 0 Means no regularization applied.

§l2: f32

L2 regularization term applied to the weights of the tree. Valid values are 0 to infinity.

§gamma: f32

The minimum amount of loss required to further split a node. Valid values are 0 to infinity.

§max_delta_step: f32

Maximum delta step allowed at each leaf. This is the maximum magnitude a leaf can take. Setting to 0 results in no constrain.

§min_leaf_weight: f32

Minimum sum of the hessian values of the loss function required to be in a node.

§base_score: f64

The initial prediction value of the model.

§nbins: u16

Number of bins to calculate to partition the data. Setting this to a smaller number, will result in faster training time, while potentially sacrificing accuracy. If there are more bins, than unique values in a column, all unique values will be used.

§parallel: bool§allow_missing_splits: bool

Should the algorithm allow splits that completed seperate out missing and non-missing values, in the case where create_missing_branch is false. When create_missing_branch is true, setting this to true will result in the missin branch being further split.

§monotone_constraints: Option<ConstraintMap>

Constraints that are used to enforce a specific relationship between the training features and the target variable.

§subsample: f32

Percent of records to randomly sample at each iteration when training a tree.

§top_rate: f64

Used only in goss. The retain ratio of large gradient data.

§other_rate: f64

Used only in goss. the retain ratio of small gradient data.

§colsample_bytree: f64

Specify the fraction of columns that should be sampled at each iteration, valid values are in the range (0.0,1.0].

§seed: u64

Integer value used to seed any randomness used in the algorithm.

§missing: f64

Value to consider missing.

§create_missing_branch: bool

Should missing be split out it’s own separate branch?

§sample_method: SampleMethod

Specify the method that records should be sampled when training?

§grow_policy: GrowPolicy

Growth policy to use when training a tree, this is how the next node is selected.

§evaluation_metric: Option<Metric>

Define the evaluation metric to record at each iterations.

§early_stopping_rounds: Option<usize>

Number of rounds where the evaluation metric value must improve in to keep training.

§initialize_base_score: bool

If this is specified, the base_score will be calculated using the sample_weight and y data in accordance with the requested objective_type.

§terminate_missing_features: HashSet<usize>

A set of features for which the missing node will always be terminated, even if allow_missing_splits is set to true. This value is only valid if create_missing_branch is also True.

§evaluation_history: Option<RowMajorMatrix<f64>>

A matrix of the evaluation history on the evaluation datasets.

§best_iteration: Option<usize>§prediction_iteration: Option<usize>

Number of trees to use when predicting, defaults to best_iteration if this is defined.

§missing_node_treatment: MissingNodeTreatment

How the missing nodes weights should be treated at training time.

§log_iterations: usize

Should the model be trained showing output.

§force_children_to_bound_parent: bool

Should the children nodes contain the parent node in their bounds, setting this to true, will result in no children being created that result in the higher and lower child values both being greater than, or less than the parent weight.

§trees: Vec<Tree>

Implementations§

source§

impl GradientBooster

source

pub fn new( objective_type: ObjectiveType, iterations: usize, learning_rate: f32, max_depth: usize, max_leaves: usize, l1: f32, l2: f32, gamma: f32, max_delta_step: f32, min_leaf_weight: f32, base_score: f64, nbins: u16, parallel: bool, allow_missing_splits: bool, monotone_constraints: Option<ConstraintMap>, subsample: f32, top_rate: f64, other_rate: f64, colsample_bytree: f64, seed: u64, missing: f64, create_missing_branch: bool, sample_method: SampleMethod, grow_policy: GrowPolicy, evaluation_metric: Option<Metric>, early_stopping_rounds: Option<usize>, initialize_base_score: bool, terminate_missing_features: HashSet<usize>, missing_node_treatment: MissingNodeTreatment, log_iterations: usize, force_children_to_bound_parent: bool ) -> Result<Self, ForustError>

Gradient Booster object

  • objective_type - The name of objective function used to optimize. Valid options include “LogLoss” to use logistic loss as the objective function, or “SquaredLoss” to use Squared Error as the objective function.
  • iterations - Total number of trees to train in the ensemble.
  • learning_rate - Step size to use at each iteration. Each leaf weight is multiplied by this number. The smaller the value, the more conservative the weights will be.
  • max_depth - Maximum depth of an individual tree. Valid values are 0 to infinity.
  • max_leaves - Maximum number of leaves allowed on a tree. Valid values are 0 to infinity. This is the total number of final nodes.
  • l2 - L2 regularization term applied to the weights of the tree. Valid values are 0 to infinity.
  • gamma - The minimum amount of loss required to further split a node. Valid values are 0 to infinity.
  • min_leaf_weight - Minimum sum of the hessian values of the loss function required to be in a node.
  • base_score - The initial prediction value of the model. If set to None the parameter initialize_base_score will automatically be set to true, in which case the base score will be chosen based on the objective function at fit time.
  • nbins - Number of bins to calculate to partition the data. Setting this to a smaller number, will result in faster training time, while potentially sacrificing accuracy. If there are more bins, than unique values in a column, all unique values will be used.
  • parallel - Should the algorithm be run in parallel?
  • allow_missing_splits - Should the algorithm allow splits that completed seperate out missing and non-missing values, in the case where create_missing_branch is false. When create_missing_branch is true, setting this to true will result in the missin branch being further split.
  • monotone_constraints - Constraints that are used to enforce a specific relationship between the training features and the target variable.
  • subsample - Percent of records to randomly sample at each iteration when training a tree.
  • top_rate - Used only in goss. The retain ratio of large gradient data.
  • other_rate - Used only in goss. the retain ratio of small gradient data.
  • colsample_bytree - Specify the fraction of columns that should be sampled at each iteration, valid values are in the range (0.0,1.0].
  • seed - Integer value used to seed any randomness used in the algorithm.
  • missing - Value to consider missing.
  • create_missing_branch - Should missing be split out it’s own separate branch?
  • sample_method - Specify the method that records should be sampled when training?
  • evaluation_metric - Define the evaluation metric to record at each iterations.
  • early_stopping_rounds - Number of rounds that must
  • initialize_base_score - If this is specified, the base_score will be calculated using the sample_weight and y data in accordance with the requested objective_type.
  • missing_node_treatment - specify how missing nodes should be handled during training.
  • log_iterations - Setting to a value (N) other than zero will result in information being logged about ever N iterations.
source

pub fn fit( &mut self, data: &Matrix<'_, f64>, y: &[f64], sample_weight: &[f64], evaluation_data: Option<Vec<EvaluationData<'_>>> ) -> Result<(), ForustError>

Fit the gradient booster on a provided dataset.

  • data - Either a pandas DataFrame, or a 2 dimensional numpy array.
  • y - Either a pandas Series, or a 1 dimensional numpy array.
  • sample_weight - Instance weights to use when training the model. If None is passed, a weight of 1 will be used for every record.
source

pub fn fit_unweighted( &mut self, data: &Matrix<'_, f64>, y: &[f64], evaluation_data: Option<Vec<EvaluationData<'_>>> ) -> Result<(), ForustError>

Fit the gradient booster on a provided dataset without any weights.

  • data - Either a pandas DataFrame, or a 2 dimensional numpy array.
  • y - Either a pandas Series, or a 1 dimensional numpy array.
source

pub fn predict(&self, data: &Matrix<'_, f64>, parallel: bool) -> Vec<f64>

Generate predictions on data using the gradient booster.

  • data - Either a pandas DataFrame, or a 2 dimensional numpy array.
source

pub fn predict_leaf_indices(&self, data: &Matrix<'_, f64>) -> Vec<usize>

Predict the leaf Indexes, this returns a vector of length N records * N Trees

source

pub fn predict_contributions( &self, data: &Matrix<'_, f64>, method: ContributionsMethod, parallel: bool ) -> Vec<f64>

Predict the contributions matrix for the provided dataset.

source

pub fn value_partial_dependence(&self, feature: usize, value: f64) -> f64

Given a value, return the partial dependence value of that value for that feature in the model.

  • feature - The index of the feature.
  • value - The value for which to calculate the partial dependence.
source

pub fn calculate_feature_importance( &self, method: ImportanceMethod, normalize: bool ) -> HashMap<usize, f32>

Calculate feature importance measure for the features in the model.

  • method: variable importance method to use.
  • n_features: The number of features to calculate the importance for.
source

pub fn save_booster(&self, path: &str) -> Result<(), ForustError>

Save a booster as a json object to a file.

  • path - Path to save booster.
source

pub fn json_dump(&self) -> Result<String, ForustError>

Dump a booster as a json object

source

pub fn from_json(json_str: &str) -> Result<Self, ForustError>

Load a booster from Json string

  • json_str - String object, which can be serialized to json.
source

pub fn load_booster(path: &str) -> Result<Self, ForustError>

Load a booster from a path to a json booster object.

  • path - Path to load booster from.
source

pub fn set_objective_type(self, objective_type: ObjectiveType) -> Self

Set the objective_type on the booster.

  • objective_type - The objective type of the booster.
source

pub fn set_iterations(self, iterations: usize) -> Self

Set the iterations on the booster.

  • iterations - The number of iterations of the booster.
source

pub fn set_learning_rate(self, learning_rate: f32) -> Self

Set the learning_rate on the booster.

  • learning_rate - The learning rate of the booster.
source

pub fn set_max_depth(self, max_depth: usize) -> Self

Set the max_depth on the booster.

  • max_depth - The maximum tree depth of the booster.
source

pub fn set_max_leaves(self, max_leaves: usize) -> Self

Set the max_leaves on the booster.

  • max_leaves - The maximum number of leaves of the booster.
source

pub fn set_nbins(self, nbins: u16) -> Self

Set the number of nbins on the booster.

  • max_leaves - Number of bins to calculate to partition the data. Setting this to a smaller number, will result in faster training time, while potentially sacrificing accuracy. If there are more bins, than unique values in a column, all unique values will be used.
source

pub fn set_l1(self, l1: f32) -> Self

Set the l1 on the booster.

  • l1 - The l1 regulation term of the booster.
source

pub fn set_l2(self, l2: f32) -> Self

Set the l2 on the booster.

  • l2 - The l2 regulation term of the booster.
source

pub fn set_gamma(self, gamma: f32) -> Self

Set the gamma on the booster.

  • gamma - The gamma value of the booster.
source

pub fn set_max_delta_step(self, max_delta_step: f32) -> Self

Set the max_delta_step on the booster.

  • max_delta_step - The max_delta_step value of the booster.
source

pub fn set_min_leaf_weight(self, min_leaf_weight: f32) -> Self

Set the min_leaf_weight on the booster.

  • min_leaf_weight - The minimum sum of the hession values allowed in the node of a tree of the booster.
source

pub fn set_base_score(self, base_score: f64) -> Self

Set the base_score on the booster.

  • base_score - The base score of the booster.
source

pub fn set_initialize_base_score(self, initialize_base_score: bool) -> Self

Set the base_score on the booster.

  • base_score - The base score of the booster.
source

pub fn set_parallel(self, parallel: bool) -> Self

Set the parallel on the booster.

  • parallel - Set if the booster should be trained in parallels.
source

pub fn set_allow_missing_splits(self, allow_missing_splits: bool) -> Self

Set the allow_missing_splits on the booster.

  • allow_missing_splits - Set if missing splits are allowed for the booster.
source

pub fn set_monotone_constraints( self, monotone_constraints: Option<ConstraintMap> ) -> Self

Set the monotone_constraints on the booster.

  • monotone_constraints - The monotone constraints of the booster.
source

pub fn set_subsample(self, subsample: f32) -> Self

Set the subsample on the booster.

  • subsample - Percent of the data to randomly sample when training each tree.
source

pub fn set_colsample_bytree(self, colsample_bytree: f64) -> Self

Set the colsample_bytree on the booster.

  • colsample_bytree - Percent of the columns to randomly sample when training each tree.
source

pub fn set_seed(self, seed: u64) -> Self

Set the seed on the booster.

  • seed - Integer value used to see any randomness used in the algorithm.
source

pub fn set_missing(self, missing: f64) -> Self

Set missing value of the booster

  • missing - Float value to consider as missing.
source

pub fn set_create_missing_branch(self, create_missing_branch: bool) -> Self

Set create missing value of the booster

  • create_missing_branch - Bool specifying if missing should get it’s own branch.
source

pub fn set_sample_method(self, sample_method: SampleMethod) -> Self

Set sample method on the booster.

  • sample_method - Sample method.
source

pub fn set_evaluation_metric(self, evaluation_metric: Option<Metric>) -> Self

Set sample method on the booster.

  • evaluation_metric - Sample method.
source

pub fn set_early_stopping_rounds( self, early_stopping_rounds: Option<usize> ) -> Self

Set early stopping rounds.

  • early_stopping_rounds - Early stoppings rounds.
source

pub fn set_prediction_iteration( self, prediction_iteration: Option<usize> ) -> Self

Set prediction iterations.

  • early_stopping_rounds - Early stoppings rounds.
source

pub fn set_terminate_missing_features( self, terminate_missing_features: HashSet<usize> ) -> Self

Set the features where whose missing nodes should always be terminated.

  • terminate_missing_features - Hashset of the feature indices for the features that should always terminate the missing node, if create_missing_branch is true.
source

pub fn insert_metadata(&mut self, key: String, value: String)

Insert metadata

  • key - String value for the metadata key.
  • value - value to assign to the metadata key.
source

pub fn get_metadata(&self, key: &String) -> Option<String>

Get Metadata

  • key - Get the associated value for the metadata key.

Trait Implementations§

source§

impl Default for GradientBooster

source§

fn default() -> Self

Returns the “default value” for a type. Read more
source§

impl<'de> Deserialize<'de> for GradientBooster

source§

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>
where __D: Deserializer<'de>,

Deserialize this value from the given Serde deserializer. Read more
source§

impl Serialize for GradientBooster

source§

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error>
where __S: Serializer,

Serialize this value into the given Serde serializer. Read more

Auto Trait Implementations§

Blanket Implementations§

source§

impl<T> Any for T
where T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for T
where T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for T
where U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> IntoEither for T

source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
source§

impl<T> Pointable for T

source§

const ALIGN: usize = _

The alignment of pointer.
§

type Init = T

The type for initializers.
source§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
source§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
source§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
source§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

source§

fn vzip(self) -> V

source§

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,