aprender 0.27.2

Next-generation machine learning library in pure Rust
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
[workspace]
members = [".", "crates/aprender-shell", "crates/aprender-tsp", "crates/aprender-monte-carlo", "crates/apr-cli"]
exclude = ["fuzz"]
resolver = "2"

[workspace.package]
edition = "2021"
license = "MIT"
repository = "https://github.com/paiml/aprender"

[workspace.dependencies]
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
rand = { version = "0.9", features = ["small_rng"] }
proptest = "1.6"

[workspace.lints.rust]
# Safety
# Note: Using "deny" (not "forbid") to allow documented unsafe in mmap module.
# See bundle-mmap-spec.md Section 4 for safety justification.
unsafe_code = "deny"
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(kani)', 'cfg(coverage_nightly)'] }
unsafe_op_in_unsafe_fn = "warn"

# Code Quality
unreachable_pub = "warn"
missing_debug_implementations = "warn"
missing_docs = "allow"  # We have doc coverage checks separately

# Best Practices
rust_2018_idioms = { level = "warn", priority = -1 }  # Lower priority to avoid conflicts
trivial_casts = "warn"
trivial_numeric_casts = "warn"
unused_import_braces = "warn"
unused_lifetimes = "warn"
unused_qualifications = "allow"  # Explicit paths sometimes preferred for clarity

[workspace.lints.clippy]
# Base level
all = { level = "warn", priority = -1 }
pedantic = { level = "warn", priority = -1 }

# Correctness (high priority)
checked_conversions = "warn"
missing_errors_doc = "allow"  # We have comprehensive error docs
missing_panics_doc = "allow"  # We document via expect() messages

# Performance
inefficient_to_string = "allow"  # Style preference - to_string() often clearer
explicit_iter_loop = "warn"
manual_ok_or = "warn"

# Style & Clarity
explicit_deref_methods = "warn"
implicit_clone = "warn"
inconsistent_struct_constructor = "warn"
redundant_closure_for_method_calls = "allow"  # Common pattern in iterators
unnested_or_patterns = "warn"
used_underscore_binding = "warn"

# Allow pedantic lints that conflict with mathematical notation or ML patterns
many_single_char_names = "allow"
cast_precision_loss = "allow"
cast_possible_truncation = "allow"  # Common in ML with dimension conversions
cast_possible_wrap = "allow"  # Common in ML algorithms
cast_sign_loss = "allow"  # Common in ML with usize/isize conversions
similar_names = "allow"
doc_markdown = "allow"
missing_const_for_fn = "allow"
module_name_repetitions = "allow"
must_use_candidate = "allow"
return_self_not_must_use = "allow"
float_cmp = "allow"  # ML algorithms often compare floats
unreadable_literal = "allow"  # Test data often has long numeric literals
items_after_statements = "allow"  # ML algorithms often need mid-function declarations
large_stack_arrays = "allow"  # ML tests often need large data arrays
too_many_arguments = "allow"  # ML training functions often need many parameters
too_many_lines = "allow"  # ML algorithms can be long
needless_range_loop = "allow"  # Explicit indexing often clearer in ML code
assigning_clones = "allow"  # Common pattern in state reset
missing_fields_in_debug = "allow"  # Some fields intentionally omitted
derivable_impls = "allow"  # Sometimes explicit impls are clearer
uninlined_format_args = "allow"  # Format string style preference
type_complexity = "allow"  # Complex types common in ML
cloned_instead_of_copied = "allow"  # Style preference
unused_self = "allow"  # Method stubs common during development
useless_vec = "allow"  # Test data often uses vec![] for clarity
manual_is_multiple_of = "allow"  # x % n == 0 pattern is clear and idiomatic in ML code
manual_div_ceil = "allow"  # (x + n - 1) / n pattern is well-understood
manual_midpoint = "allow"  # (a + b) / 2.0 is standard mathematical notation in ML
map_unwrap_or = "allow"  # .map().unwrap_or() pattern common in tests
manual_range_contains = "allow"  # Explicit comparisons often clearer
empty_line_after_doc_comments = "allow"  # provable-contracts-macros proc macro generates these
empty_line_after_outer_attr = "allow"  # provable-contracts-macros proc macro generates these
approx_constant = "allow"  # Test data uses explicit values
match_wildcard_for_single_variants = "allow"  # Future-proofs against enum changes
single_char_pattern = "allow"  # Explicit string patterns preferred
field_reassign_with_default = "allow"  # Common in test configuration
assertions_on_constants = "allow"  # Used for compile-time checks
identity_op = "allow"  # 1 * 1 * 4 shows tensor dimensions (batch * seq * features)
unnecessary_literal_unwrap = "allow"  # Test code with known Ok values
default_trait_access = "allow"  # Default::default() vs Type::default() style preference
len_zero = "allow"  # Explicit .len() > 0 sometimes clearer in ML contexts
cast_lossless = "allow"  # Explicit casts preferred in ML code
cloned_ref_to_slice_refs = "allow"  # Style preference
clone_on_copy = "allow"  # Explicit clones for clarity
default_constructed_unit_structs = "allow"  # Style preference
erasing_op = "allow"  # x * 0 patterns in test data
excessive_precision = "allow"  # ML test data needs precise values
format_push_string = "allow"  # String building style preference
if_not_else = "allow"  # Style preference
manual_contains = "allow"  # Explicit iteration sometimes clearer
match_same_arms = "allow"  # Explicit matching for future-proofing
needless_borrows_for_generic_args = "allow"  # Style preference
needless_raw_string_hashes = "allow"  # Style preference
no_effect_underscore_binding = "allow"  # Used for documentation
overly_complex_bool_expr = "allow"  # Explicit logic sometimes clearer
stable_sort_primitive = "allow"  # Explicit sort stability
unnecessary_literal_bound = "allow"  # Style preference
unnecessary_map_or = "allow"  # Style preference
vec_init_then_push = "allow"  # Explicit initialization for clarity
nonminimal_bool = "allow"  # Complex booleans in mutation testing tests
trivially_copy_pass_by_ref = "allow"  # API consistency with &T for traits
bool_to_int_with_if = "allow"  # Explicit conversion often clearer
manual_let_else = "allow"  # Early returns common pattern
needless_pass_by_value = "allow"  # API consistency
ptr_arg = "allow"  # &PathBuf/&String in APIs for consistency
single_match_else = "allow"  # Explicit match often clearer than if let
const_is_empty = "allow"  # Compile-time empty checks for arrays
unnecessary_wraps = "allow"  # Result/Option wrappers for API consistency
ignore_without_reason = "allow"  # #[ignore] tests don't always need reasons
index_refutable_slice = "allow"  # Explicit indexing preferred in ML code
redundant_guards = "allow"  # Explicit guards for clarity in pattern matching
print_literal = "allow"  # Explicit format strings for documentation
unnecessary_debug_formatting = "allow"  # Debug formatting in tests
iter_cloned_collect = "allow"  # .iter().cloned().collect() pattern
semicolon_if_nothing_returned = "allow"  # Style preference

[package]
name = "aprender"
version = "0.27.2"
edition = "2021"
rust-version = "1.89"
authors = ["Noah Gift <noah@paiml.com>"]
license = "MIT"
description = "Next-generation machine learning library in pure Rust"
repository = "https://github.com/paiml/aprender"
documentation = "https://docs.rs/aprender"
readme = "README.md"
keywords = ["machine-learning", "classification", "clustering", "statistics", "graph-algorithms"]
categories = ["science", "algorithms"]
exclude = [
    # Build/IDE artifacts
    "target/",
    "*.profraw",
    "*.profdata",
    ".vscode/",
    ".idea/",
    "proptest-regressions/",
    # Dev tool artifacts (CB-510 class: recursive patterns required)
    ".pmat/",
    ".pmat-metrics/",
    ".pmat-metrics.toml",
    "*.bak",
    # CI/CD and dev infrastructure (not needed by library consumers)
    ".github/",
    ".githooks/",
    ".bashrsignore",
    "Makefile",
    "scripts/",
    # Documentation (published on GitHub Pages, not crates.io)
    "docs/",
    "book/",
    # Test data and traces
    "golden_traces/",
    "tokenizer.json",
    "defect-report-*.json",
    "trace_*.json",
    "fuzz/",
    # Model files (root-anchored: /models/ not models/ per CB-510)
    "/models/",
    # Contracts not consumed by build.rs (model-families/ IS needed)
    "contracts/chat-template-semantics-v1.yaml",
    "contracts/classification-finetune-v1.yaml",
    "contracts/kernel-fusion-v1.yaml",
    "contracts/layer-parity-v1.yaml",
    "contracts/model-metadata-bounds-v1.yaml",
    "contracts/quantized-dot-product-v1.yaml",
    "contracts/special-tokens-registry-v1.yaml",
    "contracts/tensor-layout-v1.yaml",
    "contracts/tokenizer-vocab-v1.yaml",
    "contracts/publish-safety-v1.yaml",
]

[lints]
workspace = true

[dependencies]
# Serialization
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"  # SafeTensors JSON metadata
bincode = "1.3"
rmp-serde = "1.3"  # MessagePack for .apr metadata (spec §2)

# Random number generation for model_selection
rand = { version = "0.9", features = ["small_rng"] }
rand_chacha = "0.9"  # ChaCha20 PRNG for Monte Carlo simulations

# Audio processing
rustfft = { version = "6.2", optional = true }  # FFT for mel spectrogram computation
thiserror = { version = "2.0", optional = true }  # Error handling for audio module

# Native audio capture (Linux ALSA)
alsa = { version = "0.9", optional = true }  # ALSA bindings for Linux audio capture

# Parallelization for graph algorithms (optional for WASM compatibility)
rayon = { version = "1.10", optional = true }

# Shared formatting and system utilities (Batuta stack)
batuta-common = "0.1"

# Core compute primitives - SIMD-accelerated tensor operations
# v0.14.5: wgpu adapter limits fix (buffer >256MB for 7B+ models)
trueno = "0.16.0"

# K-quantization formats (Q4_K, Q5_K, Q6_K) - Toyota Way: ONE source of truth
# Path dependency until published to crates.io
trueno-quant = "0.1"

# RAG pipeline for document-based ML (optional, GH-125)
trueno-rag = { version = "0.2", optional = true }

# Compression for .apr format (optional, spec §3.3)
lz4_flex = { version = "0.11", optional = true }
zstd = { version = "0.13", optional = true }

# Half-precision floats for quantization (spec §6.2)
half = { version = "2.4", optional = true, default-features = false, features = ["std"] }

# Digital signatures for .apr format (optional, spec §4.2)
ed25519-dalek = { version = "2.1", optional = true, default-features = false, features = ["std", "zeroize", "rand_core"] }

# Encryption for .apr format (optional, spec §4.1)
aes-gcm = { version = "0.10", optional = true }
argon2 = { version = "0.5", optional = true, default-features = false, features = ["std"] }
x25519-dalek = { version = "2.0", optional = true, default-features = false, features = ["static_secrets"] }
hkdf = { version = "0.12", optional = true }
sha2 = { version = "0.10", optional = true }  # For HKDF-SHA256

# Data loading
alimentar = { version = "0.2.2", optional = true }

# entrenar: removed as runtime dep — explainable types now live in aprender (GH-305)
# entrenar remains as dev-dependency for InferenceMonitor integration tests

# Syscall/GPU profiling for showcase benchmarks (dev-only to avoid circular dep)
# renacer depends on aprender, so it cannot be a runtime dep
# renacer = { version = "0.10", optional = true }

# SIMD-accelerated compression for KV cache (optional)
trueno-zram-core = { version = "0.3.0", optional = true }

# Hugging Face Hub integration (optional, spec §11.8, hf-hub-upload-spec.md)
hf-hub = { version = "0.4", optional = true, default-features = false, features = ["ureq"] }
dirs = { version = "6.0", optional = true }
ureq = { version = "2.12", optional = true, features = ["json"] }  # Direct HTTP for HF Hub upload (APR-PUB-001)

# SafeTensors format parsing (optional, for HF model comparison)
safetensors = { version = "0.4", optional = true }

# WASM bindings for noise generator (optional, spec: noise-generator-apr-wasm-spec.md)
wasm-bindgen = { version = "0.2", optional = true }
js-sys = { version = "0.3", optional = true }
minijinja = { version = "2.14.0", features = ["loader", "serde"] }

# UCBD: compile-time contract enforcement via #[contract] proc macro
provable-contracts-macros = "0.1"

# Toyota Way: ONE source of truth for quantization (Step E)
# NOTE: Currently blocked by cyclic dependency (realizar optionally depends on aprender).
# Resolution requires creating a separate quantization crate.
# See docs/specifications/qwen2.5-coder-showcase-demo.md Section E.7 for roadmap.
# realizar = { version = "0.6", default-features = false, optional = true }  # BLOCKED

[build-dependencies]
serde = { version = "1", features = ["derive"] }
serde_yaml_ng = "0.10"

[dev-dependencies]
proptest = "1.6"
criterion = "0.5"
renacer = "0.10"
tempfile = "3.14"  # For format module tests
jugar-probar = "0.5"  # TUI/GUI testing framework with coverage tracking (spec §8)
ctrlc = "3.4"  # Signal handling for SIGINT/SIGTERM (PMAT-098-PF: zombie process mitigation)
provable-contracts = "0.1"  # Contract enforcement (dev-only)
entrenar = "0.7"  # Integration tests for InferenceMonitor (GH-305: was runtime dep, now dev-only)

[features]
default = ["parallel"]
parallel = ["rayon"]  # Enable parallel graph algorithms (disable for WASM)
datasets = ["alimentar"]  # Enable data loading from alimentar
format-compression = ["lz4_flex", "zstd"]  # Enable LZ4/ZSTD compression for .apr format (spec §3.3, GH-146)
format-signing = ["ed25519-dalek"]  # Enable Ed25519 signatures for .apr format (spec §4.2)
format-encryption = ["aes-gcm", "argon2", "x25519-dalek", "hkdf", "sha2"]  # Enable encryption for .apr format (spec §4.1)
format-quantize = ["half"]  # Enable quantization for .apr format (spec §6.2)
format-homomorphic = []  # Enable homomorphic encryption for .apr format (spec: homomorphic-encryption-spec.md)
# Note: mmap is automatic on native platforms, no feature needed (spec: bundle-mmap-spec.md)
hf-hub-integration = ["hf-hub", "dirs", "ureq", "sha2"]  # Enable Hugging Face Hub integration (GH-100, APR-PUB-001)
audio = ["rustfft", "thiserror"]  # Enable audio processing (mel spectrogram, resampling)
audio-capture = ["audio"]  # Enable audio capture base functionality
audio-alsa = ["audio-capture", "alsa"]  # Enable ALSA audio capture (Linux only)
audio-coreaudio = ["audio-capture"]  # Enable CoreAudio capture (macOS only)
audio-wasapi = ["audio-capture"]  # Enable WASAPI capture (Windows only)
audio-webaudio = ["audio-capture"]  # Enable WebAudio capture (WASM only)
audio-playback = ["audio"]  # Enable audio playback
audio-codec = ["audio"]  # Enable audio codec decoding (WAV, MP3, AAC, FLAC, Opus)
audio-noise = ["audio"]  # Enable ML-based noise generation (GH-144)
audio-noise-wasm = ["audio-noise", "wasm-bindgen", "js-sys"]  # Enable WASM bindings for noise generator
safetensors-compare = ["safetensors", "hf-hub-integration", "half"]  # Enable SafeTensors comparison (GH-121)
rag = ["trueno-rag"]  # Enable RAG pipeline for document-based ML (GH-125)
# inference-monitoring feature removed — explainable types now unconditional (GH-305)
gpu = ["trueno/gpu"]  # Enable GPU acceleration via trueno wgpu backend
model-tests = []  # Enable heavy model/inference tests (requires models/ dir, ollama, GPU)
cuda = ["trueno/cuda-monitor"]  # Enable CUDA monitoring via trueno-gpu (NVIDIA GPUs)
cpu-only = []
showcase-profile = []  # Renacer profiling (disabled: renacer→aprender circular dep)
showcase-zram = ["trueno-zram-core"]  # Enable trueno-zram KV cache compression for showcase benchmarks (PAR-040)
# Chaos engineering features (from renacer)
chaos-basic = []
chaos-network = ["chaos-basic"]
chaos-byzantine = ["chaos-basic"]
chaos-full = ["chaos-network", "chaos-byzantine"]

# WASM support: enable getrandom's "js" feature for browser environments
[target.'cfg(target_arch = "wasm32")'.dependencies]
getrandom = { version = "0.2", features = ["js"] }

# Memory-mapped I/O for native platforms (spec: bundle-mmap-spec.md)
[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
memmap2 = "0.9"

[[test]]
name = "book"
path = "tests/book/mod.rs"

[[bench]]
name = "linear_regression"
harness = false

[[bench]]
name = "kmeans"
harness = false

[[bench]]
name = "dataframe"
harness = false

[[bench]]
name = "graph"
harness = false

[[bench]]
name = "recommend"
harness = false

[[bench]]
name = "citl"
harness = false

[[bench]]
name = "ollama_parity"
harness = false
required-features = ["format-quantize"]

[[example]]
name = "shell_encryption_demo"
required-features = ["format-encryption"]

[[example]]
name = "chat_template"

[[example]]
name = "text_preprocessing"

[[example]]
name = "time_series_forecasting"

[profile.release]
lto = true
codegen-units = 1
panic = "abort"
strip = "none"
debug = true  # Enable debug info for flamegraph/profiling

[profile.dev]
lto = false
panic = "abort"

[package.metadata.docs.rs]
all-features = true
rustdoc-args = ["--generate-link-to-definition"]
targets = ["x86_64-unknown-linux-gnu"]

[package.metadata.release]
sign-commit = false
sign-tag = false
push = true
publish = true
shared-version = true
tag-name = "v{{version}}"
pre-release-commit-message = "release: aprender v{{version}}"

[[package.metadata.release.pre-release-replacements]]
file = "CHANGELOG.md"
search = "## \\[Unreleased\\]"
replace = "## [{{version}}] - {{date}}"

# PMAT-262: Self-patch so transitive deps (realizar, entrenar) use the local
# workspace aprender instead of a stale crates.io version. This prevents type
# mismatches when building apr-cli from the workspace.
# GH-344: Sibling patches (realizar, trueno, etc.) moved to .cargo/config.toml
# so that `git clone && cargo check` works without sibling repos.
# See .cargo/config.toml.dev-overrides for full-stack development setup.
[patch.crates-io]
aprender = { path = "." }