Skip to main content

tenflowers_dataset/visualization/
types.rs

1//! Data structures for dataset visualization
2//!
3//! This module contains all the data types used for representing
4//! visualization information about datasets.
5
6use std::collections::HashMap;
7
8/// Sample preview information
9#[derive(Debug, Clone)]
10pub struct SamplePreview {
11    pub total_samples: usize,
12    pub samples_shown: usize,
13    pub samples: Vec<SampleInfo>,
14}
15
16/// Information about a single sample
17#[derive(Debug, Clone)]
18pub struct SampleInfo {
19    pub index: usize,
20    pub feature_shape: Vec<usize>,
21    pub label_shape: Vec<usize>,
22}
23
24/// Distribution information for features and labels
25#[derive(Debug, Clone)]
26pub struct DistributionInfo<T> {
27    pub samples_analyzed: usize,
28    pub feature_stats: Vec<FeatureStats<T>>,
29    pub label_stats: Vec<FeatureStats<T>>,
30}
31
32/// Statistics for a single feature dimension
33#[derive(Debug, Clone)]
34pub struct FeatureStats<T> {
35    pub dimension: usize,
36    pub mean: T,
37    pub std_dev: T,
38    pub min: T,
39    pub max: T,
40}
41
42/// Class distribution information
43#[derive(Debug, Clone)]
44pub struct ClassDistribution {
45    pub total_samples: usize,
46    pub class_counts: HashMap<String, usize>,
47}
48
49/// Histogram information for a feature
50#[derive(Debug, Clone)]
51pub struct FeatureHistogram<T> {
52    pub feature_index: usize,
53    pub min_value: T,
54    pub max_value: T,
55    pub bin_width: T,
56    pub bin_counts: Vec<usize>,
57}
58
59/// Analysis of augmentation effects on dataset samples
60#[derive(Debug, Clone)]
61pub struct AugmentationEffects<T> {
62    pub samples_analyzed: usize,
63    pub transform_success_rate: f64,
64    pub feature_changes: FeatureChangeAnalysis<T>,
65    pub distribution_changes: DistributionChangeAnalysis<T>,
66    pub sample_pairs: Vec<BeforeAfterPair<T>>,
67}
68
69/// Before/after pair for transformation analysis
70#[derive(Debug, Clone)]
71pub struct BeforeAfterPair<T> {
72    pub index: usize,
73    pub original: (tenflowers_core::Tensor<T>, tenflowers_core::Tensor<T>),
74    pub transformed: (tenflowers_core::Tensor<T>, tenflowers_core::Tensor<T>),
75}
76
77/// Analysis of feature changes from transformations
78#[derive(Debug, Clone)]
79pub struct FeatureChangeAnalysis<T> {
80    pub feature_count: usize,
81    pub average_change: T,
82    pub max_change: T,
83    pub min_change: T,
84    pub samples_with_changes: usize,
85}
86
87/// Analysis of distribution changes from transformations
88#[derive(Debug, Clone)]
89pub struct DistributionChangeAnalysis<T> {
90    pub original_mean: T,
91    pub transformed_mean: T,
92    pub original_std: T,
93    pub transformed_std: T,
94    pub mean_change: T,
95    pub std_change: T,
96}
97
98/// Comparison of individual samples before/after transformation
99#[derive(Debug, Clone)]
100pub struct SampleComparison<T> {
101    pub sample_index: usize,
102    pub original_stats: TensorStats<T>,
103    pub transformed_stats: TensorStats<T>,
104    pub change_magnitude: T,
105}
106
107/// Basic statistics for a tensor
108#[derive(Debug, Clone)]
109pub struct TensorStats<T> {
110    pub mean: T,
111    pub std: T,
112    pub min: T,
113    pub max: T,
114    pub element_count: usize,
115}