1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
//! Phase 2.6: synthetic data generator for training.
//!
//! Standalone, pure-CPU module used to drive the `Model` / `MoEModel`
//! layers in `src/model/` and `src/moe_model/` when the real
//! `tokitai-search` SQLite ledgers are not available (e.g. unit tests
//! and dev/CI environments). Every generator is deterministic given
//! the seed, so the same `(n_samples, in_dim, out_dim, seed)` triple
//! always yields the same dataset.
//!
//! Public surface (re-exported for convenience):
//! - [`regression::make_regression_dataset`] — closed-form-friendly
//! linear regression: `y = X w_star + epsilon`.
//! - [`decision_outcome::make_quality_decision_dataset`] — mimics the
//! real 96-dim input / 20-dim output schema from
//! `crates/training/` in tokitai-search, with a 4-expert
//! non-linear generation process.
//! - [`stream::SyntheticSampleStream`] — feeds any
//! `Vec<QualitySample>` in mini-batches.
pub use ;
pub use ;
pub use SyntheticSampleStream;
/// Dimensionality of the synthetic quality-decision input vector.
/// Mirrors `crates/training::CATEGORICAL_DIMS + NUMERICAL_DIMS` in
/// tokitai-search.
pub const QUALITY_INPUT_DIM: usize = 96;
/// Dimensionality of the synthetic quality-decision output vector.
/// Mirrors `crates/training::OUTCOME_KIND_DIMS + AUX_METRIC_DIMS`.
pub const QUALITY_OUTPUT_DIM: usize = 20;