term_guard/analyzers/traits.rs
1//! Core analyzer traits for the Term framework.
2
3use async_trait::async_trait;
4use datafusion::prelude::*;
5use serde::{Deserialize, Serialize};
6use std::fmt::Debug;
7
8use super::errors::AnalyzerResult;
9use super::types::MetricValue;
10
11/// Core trait for analyzers that compute metrics from data.
12///
13/// Analyzers support incremental computation through state management,
14/// enabling efficient processing of large datasets and parallel execution.
15///
16/// # Type Parameters
17///
18/// * `State` - The state type that holds intermediate computation results
19/// * `Metric` - The final metric type produced by this analyzer
20///
21/// # Example
22///
23/// ```rust,ignore
24/// use term_guard::analyzers::{Analyzer, AnalyzerState, MetricValue};
25/// use async_trait::async_trait;
26/// use datafusion::prelude::*;
27/// use serde::{Serialize, Deserialize};
28///
29/// #[derive(Debug, Clone, Serialize, Deserialize)]
30/// struct SizeState {
31/// count: u64,
32/// }
33///
34/// impl AnalyzerState for SizeState {
35/// fn merge(states: Vec<Self>) -> Result<Self, Box<dyn std::error::Error>> {
36/// let total_count = states.iter().map(|s| s.count).sum();
37/// Ok(SizeState { count: total_count })
38/// }
39/// }
40///
41/// struct SizeAnalyzer;
42///
43/// #[async_trait]
44/// impl Analyzer for SizeAnalyzer {
45/// type State = SizeState;
46/// type Metric = MetricValue;
47///
48/// async fn compute_state_from_data(&self, ctx: &SessionContext) -> Result<Self::State> {
49/// let df = ctx.sql("SELECT COUNT(*) as count FROM data").await?;
50/// let batches = df.collect().await?;
51/// // Extract count from batches...
52/// Ok(SizeState { count: 42 })
53/// }
54///
55/// fn compute_metric_from_state(&self, state: &Self::State) -> Result<Self::Metric> {
56/// Ok(MetricValue::Long(state.count as i64))
57/// }
58///
59/// fn name(&self) -> &str {
60/// "size"
61/// }
62/// }
63/// ```
64#[async_trait]
65pub trait Analyzer: Send + Sync + Debug {
66 /// The state type for incremental computation.
67 type State: AnalyzerState;
68
69 /// The metric type produced by this analyzer.
70 type Metric: Into<MetricValue> + Send + Sync + Debug;
71
72 /// Computes the state from the input data.
73 ///
74 /// This method performs the main computation, extracting intermediate
75 /// results that can be merged with other states for parallel processing.
76 ///
77 /// # Arguments
78 ///
79 /// * `ctx` - The DataFusion session context with registered data tables
80 ///
81 /// # Returns
82 ///
83 /// The computed state or an error if computation fails
84 async fn compute_state_from_data(&self, ctx: &SessionContext) -> AnalyzerResult<Self::State>;
85
86 /// Computes the final metric from the accumulated state.
87 ///
88 /// This method transforms the intermediate state into the final metric value.
89 ///
90 /// # Arguments
91 ///
92 /// * `state` - The accumulated state from one or more data computations
93 ///
94 /// # Returns
95 ///
96 /// The final metric value or an error if transformation fails
97 fn compute_metric_from_state(&self, state: &Self::State) -> AnalyzerResult<Self::Metric>;
98
99 /// Merges multiple states into a single state.
100 ///
101 /// This method enables parallel computation by allowing states computed
102 /// from different data partitions to be combined.
103 ///
104 /// # Arguments
105 ///
106 /// * `states` - A vector of states to merge
107 ///
108 /// # Returns
109 ///
110 /// The merged state or an error if merging fails
111 fn merge_states(&self, states: Vec<Self::State>) -> AnalyzerResult<Self::State> {
112 Self::State::merge(states)
113 }
114
115 /// Returns the name of this analyzer.
116 ///
117 /// Used for identification in results and debugging.
118 fn name(&self) -> &str;
119
120 /// Returns a description of what this analyzer computes.
121 ///
122 /// Used for documentation and error messages.
123 fn description(&self) -> &str {
124 ""
125 }
126
127 /// Returns the metric key for storing results.
128 ///
129 /// By default, this returns the analyzer name, but column-based
130 /// analyzers should override this to include the column name.
131 fn metric_key(&self) -> String {
132 self.name().to_string()
133 }
134
135 /// Returns the column(s) this analyzer operates on, if any.
136 ///
137 /// Used for optimization and dependency analysis.
138 fn columns(&self) -> Vec<&str> {
139 vec![]
140 }
141
142 /// Indicates whether this analyzer can be combined with others.
143 ///
144 /// Some analyzers may have complex logic that prevents efficient combination.
145 fn is_combinable(&self) -> bool {
146 true
147 }
148}
149
150/// Trait for analyzer state that supports incremental computation.
151///
152/// States must be serializable to support distributed computation
153/// and caching of intermediate results.
154pub trait AnalyzerState:
155 Clone + Send + Sync + Debug + Serialize + for<'de> Deserialize<'de>
156{
157 /// Merges multiple states into a single state.
158 ///
159 /// This enables parallel computation where states are computed
160 /// independently on data partitions and then combined.
161 ///
162 /// # Arguments
163 ///
164 /// * `states` - States to merge together
165 ///
166 /// # Returns
167 ///
168 /// The merged state or an error if states cannot be merged
169 fn merge(states: Vec<Self>) -> AnalyzerResult<Self>
170 where
171 Self: Sized;
172
173 /// Returns whether this state represents an empty computation.
174 ///
175 /// Used to optimize away empty states during merging.
176 fn is_empty(&self) -> bool {
177 false
178 }
179}