treeboost 0.1.0

High-performance Gradient Boosted Decision Tree engine for large-scale tabular data
Documentation
[package]
name = "treeboost"
version = "0.1.0"
edition = "2021"
description = "High-performance Gradient Boosted Decision Tree engine for large-scale tabular data"
license = "Apache-2.0"
repository = "https://github.com/ml-rust/treeboost"
homepage = "https://github.com/ml-rust/treeboost"
documentation = "https://docs.rs/treeboost"
keywords = [
  "machine-learning",
  "gbdt",
  "gradient-boosting",
  "decision-tree",
  "xgboost",
]
categories = ["science", "algorithms", "command-line-utilities"]
readme = "README.md"
authors = ["Farhan Syah"]
exclude = [
  "benchmarks/**",
  "benches/**",
  "catboost_info/**",
  "images/**",
  "results/**",
  "samples/**",
  "scripts/**",
  "target/**",
]

[lib]
name = "treeboost"
path = "src/lib.rs"
crate-type = ["cdylib", "rlib"]

[features]
default = []
python = ["dep:pyo3", "dep:numpy"]
gpu = ["dep:wgpu", "dep:pollster"]
cuda = ["dep:cudarc"]
mmap = ["dep:memmap2"]

[[bin]]
name = "treeboost"
path = "src/main.rs"

[dependencies]
# Data processing - columnar storage with lazy evaluation
polars = { version = "0.51", features = ["parquet", "lazy", "dtype-full"] }

# Parallelism - work-stealing scheduler
rayon = "1.10"

# Serialization - zero-copy model loading
rkyv = "0.8"

# Quantile estimation - streaming T-Digest for bin boundaries
tdigest = "0.2"

# Linear algebra - pure Rust, no C/Fortran linking
faer = "0.23"

# Safe transmutation for histogram entries
bytemuck = { version = "1.19", features = ["derive"] }

# Fast hashing for target encoding maps
rustc-hash = "2.1"

# Random number generation for data splitting
rand = "0.8"

# Error handling
thiserror = "2.0"

# Ordered floats for bin boundaries
ordered-float = "4.5"

# CLI argument parsing
clap = { version = "4.5", features = ["derive"] }

# Serialization framework
serde = { version = "1.0", features = ["derive"] }

# JSON serialization for predictions output
serde_json = "1.0"

# CSV serialization for tuner logging
csv = "1.3"

# Binary serialization for model files
bincode = "1.3"

# CRC32 for integrity checks in .trb files
crc32fast = "1.5"

# File locking for concurrent access prevention
fs4 = "0.13"

# Timestamp for run directories
chrono = "0.4"

# Python bindings (optional)
pyo3 = { version = "0.23", features = ["extension-module"], optional = true }

# NumPy support for Python bindings
numpy = { version = "0.23", optional = true }

# GPU compute via WebGPU (optional - for gpu feature)
wgpu = { version = "27", optional = true }

# Async executor for blocking WGPU calls (optional - for gpu feature)
pollster = { version = "0.4", optional = true }

# CUDA driver bindings (optional - for cuda feature)
cudarc = { version = "=0.18.2", optional = true, default-features = false, features = [
  "driver",
  "nvrtc",
  "dynamic-linking",
  "cuda-version-from-build-system",
] }

# Memory-mapped file I/O (optional - for mmap feature)
memmap2 = { version = "0.9", optional = true }

[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }
approx = "0.5"
tempfile = "3.15"
fastrand = "2.3"

# Competitor GBDT implementations for benchmarking
gbdt = { version = "0.1", features = ["enable_training"] }
forust-ml = "0.4"

[[bench]]
name = "competitors"
harness = false

[[bench]]
name = "profile"
harness = false

[[bench]]
name = "correctness"
harness = false

[profile.release]
lto = "thin"
codegen-units = 1
opt-level = 3

[profile.bench]
lto = "thin"
codegen-units = 1