use crate::api_standardization::{NullHandling, ResultMetadata, StandardizedConfig};
use crate::error::StatsResult;
use scirs2_core::numeric::{Float, NumCast};
use std::collections::HashMap;
use std::marker::PhantomData;
use std::sync::Arc;
use std::time::{Duration, Instant};
#[derive(Debug, Clone)]
pub struct FluentStatsConfig {
pub base_config: StandardizedConfig,
pub enable_fluent_api: bool,
pub enable_result_caching: bool,
pub enable_streaming: bool,
pub enable_async: bool,
pub auto_optimization_level: AutoOptimizationLevel,
pub result_format: ResultFormat,
pub enable_performance_monitoring: bool,
pub memory_strategy: MemoryStrategy,
}
impl Default for FluentStatsConfig {
fn default() -> Self {
Self {
base_config: StandardizedConfig::default(),
enable_fluent_api: true,
enable_result_caching: true,
enable_streaming: true,
enable_async: false, auto_optimization_level: AutoOptimizationLevel::Intelligent,
result_format: ResultFormat::Comprehensive,
enable_performance_monitoring: true,
memory_strategy: MemoryStrategy::Adaptive,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum AutoOptimizationLevel {
None, Basic, Intelligent, Aggressive, }
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum ResultFormat {
Minimal, Standard, Comprehensive, Custom, }
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum MemoryStrategy {
Conservative, Balanced, Performance, Adaptive, }
pub struct FluentStats<F>
where
F: Float + NumCast + Send + Sync + 'static + std::fmt::Display,
{
config: FluentStatsConfig,
operation_chain: Vec<StatisticalOperation>,
result_cache: Arc<std::sync::RwLock<HashMap<String, CachedResult<F>>>>,
performance_monitor: Option<PerformanceMonitor>,
_phantom: PhantomData<F>,
}
impl<F> Default for FluentStats<F>
where
F: Float + NumCast + Send + Sync + 'static + std::fmt::Display,
{
fn default() -> Self {
Self::new()
}
}
impl<F> FluentStats<F>
where
F: Float + NumCast + Send + Sync + 'static + std::fmt::Display,
{
pub fn new() -> Self {
Self::with_config(FluentStatsConfig::default())
}
pub fn with_config(config: FluentStatsConfig) -> Self {
let performance_monitor = if config.enable_performance_monitoring {
Some(PerformanceMonitor::new())
} else {
None
};
Self {
config,
operation_chain: Vec::new(),
result_cache: Arc::new(std::sync::RwLock::new(HashMap::new())),
performance_monitor,
_phantom: PhantomData,
}
}
pub fn parallel(mut self, enable: bool) -> Self {
self.config.base_config.parallel = enable;
self
}
pub fn simd(mut self, enable: bool) -> Self {
self.config.base_config.simd = enable;
self
}
pub fn confidence(mut self, level: f64) -> Self {
self.config.base_config.confidence_level = level;
self
}
pub fn null_handling(mut self, strategy: NullHandling) -> Self {
self.config.base_config.null_handling = strategy;
self
}
pub fn memory_limit(mut self, limit: usize) -> Self {
self.config.base_config.memory_limit = Some(limit);
self
}
pub fn optimization(mut self, level: AutoOptimizationLevel) -> Self {
self.config.auto_optimization_level = level;
self
}
pub fn streaming(mut self, enable: bool) -> Self {
self.config.enable_streaming = enable;
self
}
pub fn format(mut self, format: ResultFormat) -> Self {
self.config.result_format = format;
self
}
pub fn descriptive(self) -> FluentDescriptive<F> {
FluentDescriptive::new(self)
}
pub fn correlation(self) -> FluentCorrelation<F> {
FluentCorrelation::new(self)
}
pub fn test(self) -> FluentTesting<F> {
FluentTesting::new(self)
}
pub fn regression(self) -> FluentRegression<F> {
FluentRegression::new(self)
}
pub fn execute(&mut self) -> StatsResult<ChainedResults<F>> {
let start_time = Instant::now();
let mut results = ChainedResults::new();
if self.config.auto_optimization_level != AutoOptimizationLevel::None {
self.optimize_operation_chain()?;
}
for operation in &self.operation_chain {
let result = self.execute_operation(operation)?;
results.add_result(operation.name.clone(), result);
}
if let Some(ref mut monitor) = self.performance_monitor {
monitor.record_execution(start_time.elapsed(), self.operation_chain.len());
}
Ok(results)
}
fn optimize_operation_chain(&mut self) -> StatsResult<()> {
match self.config.auto_optimization_level {
AutoOptimizationLevel::Basic => {
self.operation_chain
.sort_by_key(|op| op.memory_access_pattern());
}
AutoOptimizationLevel::Intelligent => {
self.apply_intelligent_optimization()?;
}
AutoOptimizationLevel::Aggressive => {
self.fuse_operations()?;
}
AutoOptimizationLevel::None => {}
}
Ok(())
}
fn apply_intelligent_optimization(&mut self) -> StatsResult<()> {
self.operation_chain
.sort_by_key(|op| op.estimated_complexity());
Ok(())
}
fn fuse_operations(&mut self) -> StatsResult<()> {
Ok(())
}
fn execute_operation(
&self,
operation: &StatisticalOperation,
) -> StatsResult<OperationResult<F>> {
if self.config.enable_result_caching {
let cache_key = operation.cache_key();
if let Ok(cache) = self.result_cache.read() {
if let Some(cached) = cache.get(&cache_key) {
if !cached.is_expired() {
return Ok(cached.result.clone());
}
}
}
}
let result = match &operation.operation_type {
OperationType::Mean => self.execute_mean_operation(operation),
OperationType::Variance => self.execute_variance_operation(operation),
OperationType::Correlation => self.execute_correlation_operation(operation),
OperationType::TTest => self.execute_ttest_operation(operation),
OperationType::Regression => self.execute_regression_operation(operation),
}?;
if self.config.enable_result_caching {
let cache_key = operation.cache_key();
if let Ok(mut cache) = self.result_cache.write() {
cache.insert(cache_key, CachedResult::new(result.clone()));
}
}
Ok(result)
}
fn execute_mean_operation(
&self,
_operation: &StatisticalOperation,
) -> StatsResult<OperationResult<F>> {
Ok(OperationResult {
value: Box::new(F::zero()),
metadata: ResultMetadata {
samplesize: 0,
degrees_of_freedom: None,
confidence_level: None,
method: "mean".to_string(),
computation_time_ms: 0.0,
memory_usage_bytes: None,
optimized: true,
extra: HashMap::new(),
},
operation_type: OperationType::Mean,
})
}
fn execute_variance_operation(
&self,
_operation: &StatisticalOperation,
) -> StatsResult<OperationResult<F>> {
Ok(OperationResult {
value: Box::new(F::one()),
metadata: ResultMetadata {
samplesize: 0,
degrees_of_freedom: Some(0),
confidence_level: None,
method: "variance".to_string(),
computation_time_ms: 0.0,
memory_usage_bytes: None,
optimized: true,
extra: HashMap::new(),
},
operation_type: OperationType::Variance,
})
}
fn execute_correlation_operation(
&self,
_operation: &StatisticalOperation,
) -> StatsResult<OperationResult<F>> {
Ok(OperationResult {
value: Box::new(F::zero()),
metadata: ResultMetadata {
samplesize: 0,
degrees_of_freedom: None,
confidence_level: Some(0.95),
method: "pearson_correlation".to_string(),
computation_time_ms: 0.0,
memory_usage_bytes: None,
optimized: true,
extra: HashMap::new(),
},
operation_type: OperationType::Correlation,
})
}
fn execute_ttest_operation(
&self,
_operation: &StatisticalOperation,
) -> StatsResult<OperationResult<F>> {
Ok(OperationResult {
value: Box::new(F::zero()),
metadata: ResultMetadata {
samplesize: 0,
degrees_of_freedom: Some(0),
confidence_level: Some(0.95),
method: "t_test".to_string(),
computation_time_ms: 0.0,
memory_usage_bytes: None,
optimized: true,
extra: HashMap::new(),
},
operation_type: OperationType::TTest,
})
}
fn execute_regression_operation(
&self,
_operation: &StatisticalOperation,
) -> StatsResult<OperationResult<F>> {
Ok(OperationResult {
value: Box::new(F::zero()),
metadata: ResultMetadata {
samplesize: 0,
degrees_of_freedom: Some(0),
confidence_level: Some(0.95),
method: "linear_regression".to_string(),
computation_time_ms: 0.0,
memory_usage_bytes: None,
optimized: true,
extra: HashMap::new(),
},
operation_type: OperationType::Regression,
})
}
}
pub struct FluentDescriptive<F>
where
F: Float + NumCast + Send + Sync + 'static + std::fmt::Display,
{
parent: FluentStats<F>,
operations: Vec<DescriptiveOperation>,
}
impl<F> FluentDescriptive<F>
where
F: Float + NumCast + Send + Sync + 'static + std::fmt::Display,
{
fn new(parent: FluentStats<F>) -> Self {
Self {
parent,
operations: Vec::new(),
}
}
pub fn mean(mut self) -> Self {
self.operations.push(DescriptiveOperation::Mean);
self
}
pub fn variance(mut self, ddof: usize) -> Self {
self.operations.push(DescriptiveOperation::Variance(ddof));
self
}
pub fn std_dev(mut self, ddof: usize) -> Self {
self.operations.push(DescriptiveOperation::StdDev(ddof));
self
}
pub fn skewness(mut self) -> Self {
self.operations.push(DescriptiveOperation::Skewness);
self
}
pub fn kurtosis(mut self) -> Self {
self.operations.push(DescriptiveOperation::Kurtosis);
self
}
pub fn all_basic(mut self) -> Self {
self.operations.extend(vec![
DescriptiveOperation::Mean,
DescriptiveOperation::Variance(1),
DescriptiveOperation::StdDev(1),
DescriptiveOperation::Skewness,
DescriptiveOperation::Kurtosis,
]);
self
}
pub fn and(mut self) -> FluentStats<F> {
for desc_op in self.operations {
let stat_op = StatisticalOperation {
name: format!("{:?}", desc_op),
operation_type: OperationType::from_descriptive(desc_op),
parameters: HashMap::new(),
data_requirements: DataRequirements::single_array(),
};
self.parent.operation_chain.push(stat_op);
}
self.parent
}
}
pub struct FluentCorrelation<F>
where
F: Float + NumCast + Send + Sync + 'static + std::fmt::Display,
{
parent: FluentStats<F>,
correlation_type: CorrelationType,
method: CorrelationMethod,
}
impl<F> FluentCorrelation<F>
where
F: Float + NumCast + Send + Sync + 'static + std::fmt::Display,
{
fn new(parent: FluentStats<F>) -> Self {
Self {
parent,
correlation_type: CorrelationType::Pairwise,
method: CorrelationMethod::Pearson,
}
}
pub fn method(mut self, method: CorrelationMethod) -> Self {
self.method = method;
self
}
pub fn pearson(mut self) -> Self {
self.method = CorrelationMethod::Pearson;
self
}
pub fn spearman(mut self) -> Self {
self.method = CorrelationMethod::Spearman;
self
}
pub fn kendall(mut self) -> Self {
self.method = CorrelationMethod::Kendall;
self
}
pub fn matrix(mut self) -> Self {
self.correlation_type = CorrelationType::Matrix;
self
}
pub fn and(mut self) -> FluentStats<F> {
let stat_op = StatisticalOperation {
name: format!("{:?}_{:?}", self.method, self.correlation_type),
operation_type: OperationType::Correlation,
parameters: HashMap::from([
("method".to_string(), format!("{:?}", self.method)),
("type".to_string(), format!("{:?}", self.correlation_type)),
]),
data_requirements: DataRequirements::multi_array(),
};
self.parent.operation_chain.push(stat_op);
self.parent
}
}
pub struct FluentTesting<F>
where
F: Float + NumCast + Send + Sync + 'static + std::fmt::Display,
{
parent: FluentStats<F>,
test_type: TestType,
}
impl<F> FluentTesting<F>
where
F: Float + NumCast + Send + Sync + 'static + std::fmt::Display,
{
fn new(parent: FluentStats<F>) -> Self {
Self {
parent,
test_type: TestType::TTest,
}
}
pub fn t_test_one_sample(mut self, mu: F) -> Self {
self.test_type = TestType::TTestOneSample(mu.to_f64().unwrap_or(0.0));
self
}
pub fn t_test_independent(mut self) -> Self {
self.test_type = TestType::TTestIndependent;
self
}
pub fn t_test_paired(mut self) -> Self {
self.test_type = TestType::TTestPaired;
self
}
pub fn and(mut self) -> FluentStats<F> {
let stat_op = StatisticalOperation {
name: format!("{:?}", self.test_type),
operation_type: OperationType::TTest,
parameters: HashMap::new(),
data_requirements: DataRequirements::single_array(),
};
self.parent.operation_chain.push(stat_op);
self.parent
}
}
pub struct FluentRegression<F>
where
F: Float + NumCast + Send + Sync + 'static + std::fmt::Display,
{
parent: FluentStats<F>,
regression_type: RegressionType,
}
impl<F> FluentRegression<F>
where
F: Float + NumCast + Send + Sync + 'static + std::fmt::Display,
{
fn new(parent: FluentStats<F>) -> Self {
Self {
parent,
regression_type: RegressionType::Linear,
}
}
pub fn linear(mut self) -> Self {
self.regression_type = RegressionType::Linear;
self
}
pub fn polynomial(mut self, degree: usize) -> Self {
self.regression_type = RegressionType::Polynomial(degree);
self
}
pub fn ridge(mut self, alpha: F) -> Self {
self.regression_type = RegressionType::Ridge(alpha.to_f64().unwrap_or(0.0));
self
}
pub fn and(mut self) -> FluentStats<F> {
let stat_op = StatisticalOperation {
name: format!("{:?}", self.regression_type),
operation_type: OperationType::Regression,
parameters: HashMap::new(),
data_requirements: DataRequirements::xy_arrays(),
};
self.parent.operation_chain.push(stat_op);
self.parent
}
}
#[derive(Debug, Clone)]
pub struct StatisticalOperation {
pub name: String,
pub operation_type: OperationType,
pub parameters: HashMap<String, String>,
pub data_requirements: DataRequirements,
}
impl StatisticalOperation {
pub fn cache_key(&self) -> String {
format!(
"{}_{:?}_{:?}",
self.name, self.operation_type, self.parameters
)
}
pub fn memory_access_pattern(&self) -> u32 {
match self.operation_type {
OperationType::Mean => 1,
OperationType::Variance => 2,
OperationType::Correlation => 3,
OperationType::TTest => 2,
OperationType::Regression => 4,
}
}
pub fn estimated_complexity(&self) -> u32 {
match self.operation_type {
OperationType::Mean => 1,
OperationType::Variance => 2,
OperationType::Correlation => 4,
OperationType::TTest => 3,
OperationType::Regression => 5,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum OperationType {
Mean,
Variance,
Correlation,
TTest,
Regression,
}
impl OperationType {
fn from_descriptive(_descop: DescriptiveOperation) -> Self {
match _descop {
DescriptiveOperation::Mean => OperationType::Mean,
DescriptiveOperation::Variance(_) => OperationType::Variance,
DescriptiveOperation::StdDev(_) => OperationType::Variance,
DescriptiveOperation::Skewness => OperationType::Mean, DescriptiveOperation::Kurtosis => OperationType::Mean, }
}
}
#[derive(Debug, Clone)]
pub struct DataRequirements {
pub arrays_needed: usize,
pub minsize: usize,
pub requires_numeric: bool,
}
impl DataRequirements {
pub fn single_array() -> Self {
Self {
arrays_needed: 1,
minsize: 1,
requires_numeric: true,
}
}
pub fn multi_array() -> Self {
Self {
arrays_needed: 2,
minsize: 1,
requires_numeric: true,
}
}
pub fn xy_arrays() -> Self {
Self {
arrays_needed: 2,
minsize: 2,
requires_numeric: true,
}
}
}
#[derive(Debug, Clone, Copy)]
pub enum DescriptiveOperation {
Mean,
Variance(usize), StdDev(usize), Skewness,
Kurtosis,
}
#[derive(Debug, Clone, Copy)]
pub enum CorrelationType {
Pairwise,
Matrix,
Partial,
}
#[derive(Debug, Clone, Copy)]
pub enum CorrelationMethod {
Pearson,
Spearman,
Kendall,
}
#[derive(Debug, Clone)]
pub enum TestType {
TTest,
TTestOneSample(f64),
TTestIndependent,
TTestPaired,
ChiSquare,
ANOVA,
}
#[derive(Debug, Clone)]
pub enum RegressionType {
Linear,
Polynomial(usize), Ridge(f64), Lasso(f64), }
#[derive(Debug, Clone)]
pub struct OperationResult<F> {
pub value: Box<F>,
pub metadata: ResultMetadata,
pub operation_type: OperationType,
}
#[derive(Debug)]
pub struct ChainedResults<F> {
results: HashMap<String, OperationResult<F>>,
execution_order: Vec<String>,
}
impl<F> ChainedResults<F> {
fn new() -> Self {
Self {
results: HashMap::new(),
execution_order: Vec::new(),
}
}
fn add_result(&mut self, name: String, result: OperationResult<F>) {
self.execution_order.push(name.clone());
self.results.insert(name, result);
}
pub fn get(&self, name: &str) -> Option<&OperationResult<F>> {
self.results.get(name)
}
pub fn iter(&self) -> impl Iterator<Item = (&String, &OperationResult<F>)> {
self.execution_order
.iter()
.filter_map(|name| self.results.get(name).map(|result| (name, result)))
}
}
#[derive(Debug, Clone)]
struct CachedResult<F> {
result: OperationResult<F>,
created_at: Instant,
ttl: Duration,
}
impl<F> CachedResult<F> {
fn new(result: OperationResult<F>) -> Self {
Self {
result,
created_at: Instant::now(),
ttl: Duration::from_secs(300), }
}
fn is_expired(&self) -> bool {
self.created_at.elapsed() > self.ttl
}
}
#[derive(Debug)]
struct PerformanceMonitor {
executions: Vec<ExecutionMetrics>,
}
impl PerformanceMonitor {
fn new() -> Self {
Self {
executions: Vec::new(),
}
}
fn record_execution(&mut self, duration: Duration, operationcount: usize) {
self.executions.push(ExecutionMetrics {
duration,
operation_count: operationcount,
timestamp: Instant::now(),
});
}
#[allow(dead_code)]
fn average_execution_time(&self) -> Option<Duration> {
if self.executions.is_empty() {
None
} else {
let total: Duration = self.executions.iter().map(|e| e.duration).sum();
Some(total / self.executions.len() as u32)
}
}
}
#[derive(Debug)]
struct ExecutionMetrics {
duration: Duration,
#[allow(dead_code)]
operation_count: usize,
#[allow(dead_code)]
timestamp: Instant,
}
#[allow(dead_code)]
pub fn stats<F>() -> FluentStats<F>
where
F: Float + NumCast + Send + Sync + 'static + std::fmt::Display,
{
FluentStats::new()
}
#[allow(dead_code)]
pub fn stats_with<F>(config: FluentStatsConfig) -> FluentStats<F>
where
F: Float + NumCast + Send + Sync + 'static + std::fmt::Display,
{
FluentStats::with_config(config)
}
#[allow(dead_code)]
pub fn quick_descriptive<F>() -> FluentDescriptive<F>
where
F: Float + NumCast + Send + Sync + 'static + std::fmt::Display,
{
FluentStats::new().descriptive()
}
#[allow(dead_code)]
pub fn quick_correlation<F>() -> FluentCorrelation<F>
where
F: Float + NumCast + Send + Sync + 'static + std::fmt::Display,
{
FluentStats::new().correlation()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fluent_stats_creation() {
let _stats: FluentStats<f64> = stats();
assert!(true); }
#[test]
fn test_fluent_configuration() {
let config = FluentStatsConfig {
enable_fluent_api: true,
auto_optimization_level: AutoOptimizationLevel::Intelligent,
..Default::default()
};
let _stats: FluentStats<f64> = stats_with(config);
assert!(true); }
#[test]
fn test_method_chaining() {
let _chain: FluentStats<f64> = stats()
.parallel(true)
.simd(true)
.confidence(0.99)
.optimization(AutoOptimizationLevel::Aggressive);
assert!(true); }
#[test]
fn test_descriptive_operations() {
let _desc = quick_descriptive::<f64>()
.mean()
.variance(1)
.std_dev(1)
.skewness()
.kurtosis();
assert!(true); }
}