brainwires-datasets 0.7.0

Training data pipelines for the Brainwires Agent Framework — JSONL I/O, tokenization, deduplication, format conversion
Documentation
/// Dataset statistics computation.
pub mod stats;
/// Dataset validation rules and reporting.
pub mod validator;

/// MinHash-based and exact deduplication.
#[cfg(feature = "dedup")]
pub mod dedup;

pub use stats::{
    DatasetStats, HistogramBucket, PreferenceStats, RoleCounts, compute_preference_stats,
    compute_stats,
};
pub use validator::{
    DataValidator, IssueSeverity, ValidationIssue, ValidationReport, ValidatorConfig,
};

#[cfg(feature = "dedup")]
pub use dedup::{Deduplicator, exact_dedup, exact_dedup_preferences};