scouter_profile/profile/
types.rs

1use chrono::Utc;
2use core::fmt::Debug;
3use pyo3::prelude::*;
4use scouter_types::error::UtilError;
5
6use scouter_types::{FileName, PyHelperFuncs};
7use serde::{Deserialize, Serialize};
8use std::collections::BTreeMap;
9use std::collections::HashMap;
10use std::path::PathBuf;
11
12#[pyclass]
13#[derive(Debug, Serialize, Deserialize, Clone)]
14pub struct Distinct {
15    #[pyo3(get)]
16    pub count: usize,
17
18    #[pyo3(get)]
19    pub percent: f64,
20}
21
22#[pyclass]
23#[derive(Debug, Serialize, Deserialize, Clone)]
24pub struct NumericStats {
25    #[pyo3(get)]
26    pub mean: f64,
27
28    #[pyo3(get)]
29    pub stddev: f64,
30
31    #[pyo3(get)]
32    pub min: f64,
33
34    #[pyo3(get)]
35    pub max: f64,
36
37    #[pyo3(get)]
38    pub distinct: Distinct,
39
40    #[pyo3(get)]
41    pub quantiles: Quantiles,
42
43    #[pyo3(get)]
44    pub histogram: Histogram,
45}
46
47#[pyclass]
48#[derive(Debug, Serialize, Deserialize, Clone)]
49pub struct CharStats {
50    #[pyo3(get)]
51    pub min_length: usize,
52
53    #[pyo3(get)]
54    pub max_length: usize,
55
56    #[pyo3(get)]
57    pub median_length: usize,
58
59    #[pyo3(get)]
60    pub mean_length: f64,
61}
62
63#[pyclass]
64#[derive(Debug, Serialize, Deserialize, Clone)]
65pub struct WordStats {
66    #[pyo3(get)]
67    pub words: HashMap<String, Distinct>,
68}
69
70#[pyclass]
71#[derive(Debug, Serialize, Deserialize, Clone)]
72pub struct StringStats {
73    #[pyo3(get)]
74    pub distinct: Distinct,
75
76    #[pyo3(get)]
77    pub char_stats: CharStats,
78
79    #[pyo3(get)]
80    pub word_stats: WordStats,
81}
82
83#[pyclass]
84#[derive(Debug, Serialize, Deserialize, Clone)]
85pub struct FeatureProfile {
86    #[pyo3(get)]
87    pub id: String,
88
89    #[pyo3(get)]
90    pub numeric_stats: Option<NumericStats>,
91
92    #[pyo3(get)]
93    pub string_stats: Option<StringStats>,
94
95    #[pyo3(get)]
96    pub timestamp: chrono::DateTime<Utc>,
97
98    #[pyo3(get)]
99    pub correlations: Option<HashMap<String, f32>>,
100}
101
102#[pymethods]
103impl FeatureProfile {
104    pub fn __str__(&self) -> String {
105        // serialize the struct to a string
106        PyHelperFuncs::__str__(self)
107    }
108}
109
110impl FeatureProfile {
111    pub fn add_correlations(&mut self, correlations: HashMap<String, f32>) {
112        self.correlations = Some(correlations);
113    }
114}
115
116#[pyclass]
117#[derive(Debug, Serialize, Deserialize, Clone)]
118pub struct DataProfile {
119    #[pyo3(get)]
120    pub features: BTreeMap<String, FeatureProfile>,
121}
122
123#[pymethods]
124impl DataProfile {
125    pub fn __str__(&self) -> String {
126        // serialize the struct to a string
127        PyHelperFuncs::__str__(self)
128    }
129
130    pub fn model_dump_json(&self) -> String {
131        // serialize the struct to a string
132        PyHelperFuncs::__json__(self)
133    }
134
135    #[staticmethod]
136    pub fn model_validate_json(json_string: String) -> Result<DataProfile, UtilError> {
137        // deserialize the string to a struct
138        Ok(serde_json::from_str(&json_string)?)
139    }
140
141    #[pyo3(signature = (path=None))]
142    pub fn save_to_json(&self, path: Option<PathBuf>) -> Result<PathBuf, UtilError> {
143        PyHelperFuncs::save_to_json(self, path, FileName::DataProfile.to_str())
144    }
145}
146
147/// Python class for quantiles
148///
149/// # Arguments
150///
151/// * `quant_25` - The 25th percentile
152/// * `quant_50` - The 50th percentile
153/// * `quant_75` - The 75th percentile
154/// * `quant_99` - The 99th percentile
155///
156///
157#[pyclass]
158#[derive(Debug, Serialize, Deserialize, Clone)]
159pub struct Quantiles {
160    #[pyo3(get)]
161    pub q25: f64,
162
163    #[pyo3(get)]
164    pub q50: f64,
165
166    #[pyo3(get)]
167    pub q75: f64,
168
169    #[pyo3(get)]
170    pub q99: f64,
171}
172
173/// Python class for a feature histogram
174///
175/// # Arguments
176///
177/// * `bins` - A vector of bins
178/// * `bin_counts` - A vector of bin counts
179///
180#[pyclass]
181#[derive(Debug, Serialize, Deserialize, Clone, Default)]
182pub struct Histogram {
183    #[pyo3(get)]
184    pub bins: Vec<f64>,
185
186    #[pyo3(get)]
187    pub bin_counts: Vec<i32>,
188}