term_guard/analyzers/
traits.rs

1//! Core analyzer traits for the Term framework.
2
3use async_trait::async_trait;
4use datafusion::prelude::*;
5use serde::{Deserialize, Serialize};
6use std::fmt::Debug;
7
8use super::errors::AnalyzerResult;
9use super::types::MetricValue;
10
11/// Core trait for analyzers that compute metrics from data.
12///
13/// Analyzers support incremental computation through state management,
14/// enabling efficient processing of large datasets and parallel execution.
15///
16/// # Type Parameters
17///
18/// * `State` - The state type that holds intermediate computation results
19/// * `Metric` - The final metric type produced by this analyzer
20///
21/// # Example
22///
23/// ```rust,ignore
24/// use term_guard::analyzers::{Analyzer, AnalyzerState, MetricValue};
25/// use async_trait::async_trait;
26/// use datafusion::prelude::*;
27/// use serde::{Serialize, Deserialize};
28///
29/// #[derive(Debug, Clone, Serialize, Deserialize)]
30/// struct SizeState {
31///     count: u64,
32/// }
33///
34/// impl AnalyzerState for SizeState {
35///     fn merge(states: Vec<Self>) -> Result<Self, Box<dyn std::error::Error>> {
36///         let total_count = states.iter().map(|s| s.count).sum();
37///         Ok(SizeState { count: total_count })
38///     }
39/// }
40///
41/// struct SizeAnalyzer;
42///
43/// #[async_trait]
44/// impl Analyzer for SizeAnalyzer {
45///     type State = SizeState;
46///     type Metric = MetricValue;
47///
48///     async fn compute_state_from_data(&self, ctx: &SessionContext) -> Result<Self::State> {
49///         let df = ctx.sql("SELECT COUNT(*) as count FROM data").await?;
50///         let batches = df.collect().await?;
51///         // Extract count from batches...
52///         Ok(SizeState { count: 42 })
53///     }
54///
55///     fn compute_metric_from_state(&self, state: &Self::State) -> Result<Self::Metric> {
56///         Ok(MetricValue::Long(state.count as i64))
57///     }
58///
59///     fn name(&self) -> &str {
60///         "size"
61///     }
62/// }
63/// ```
64#[async_trait]
65pub trait Analyzer: Send + Sync + Debug {
66    /// The state type for incremental computation.
67    type State: AnalyzerState;
68
69    /// The metric type produced by this analyzer.
70    type Metric: Into<MetricValue> + Send + Sync + Debug;
71
72    /// Computes the state from the input data.
73    ///
74    /// This method performs the main computation, extracting intermediate
75    /// results that can be merged with other states for parallel processing.
76    ///
77    /// # Arguments
78    ///
79    /// * `ctx` - The DataFusion session context with registered data tables
80    ///
81    /// # Returns
82    ///
83    /// The computed state or an error if computation fails
84    async fn compute_state_from_data(&self, ctx: &SessionContext) -> AnalyzerResult<Self::State>;
85
86    /// Computes the final metric from the accumulated state.
87    ///
88    /// This method transforms the intermediate state into the final metric value.
89    ///
90    /// # Arguments
91    ///
92    /// * `state` - The accumulated state from one or more data computations
93    ///
94    /// # Returns
95    ///
96    /// The final metric value or an error if transformation fails
97    fn compute_metric_from_state(&self, state: &Self::State) -> AnalyzerResult<Self::Metric>;
98
99    /// Merges multiple states into a single state.
100    ///
101    /// This method enables parallel computation by allowing states computed
102    /// from different data partitions to be combined.
103    ///
104    /// # Arguments
105    ///
106    /// * `states` - A vector of states to merge
107    ///
108    /// # Returns
109    ///
110    /// The merged state or an error if merging fails
111    fn merge_states(&self, states: Vec<Self::State>) -> AnalyzerResult<Self::State> {
112        Self::State::merge(states)
113    }
114
115    /// Returns the name of this analyzer.
116    ///
117    /// Used for identification in results and debugging.
118    fn name(&self) -> &str;
119
120    /// Returns a description of what this analyzer computes.
121    ///
122    /// Used for documentation and error messages.
123    fn description(&self) -> &str {
124        ""
125    }
126
127    /// Returns the metric key for storing results.
128    ///
129    /// By default, this returns the analyzer name, but column-based
130    /// analyzers should override this to include the column name.
131    fn metric_key(&self) -> String {
132        self.name().to_string()
133    }
134
135    /// Returns the column(s) this analyzer operates on, if any.
136    ///
137    /// Used for optimization and dependency analysis.
138    fn columns(&self) -> Vec<&str> {
139        vec![]
140    }
141
142    /// Indicates whether this analyzer can be combined with others.
143    ///
144    /// Some analyzers may have complex logic that prevents efficient combination.
145    fn is_combinable(&self) -> bool {
146        true
147    }
148}
149
150/// Trait for analyzer state that supports incremental computation.
151///
152/// States must be serializable to support distributed computation
153/// and caching of intermediate results.
154pub trait AnalyzerState:
155    Clone + Send + Sync + Debug + Serialize + for<'de> Deserialize<'de>
156{
157    /// Merges multiple states into a single state.
158    ///
159    /// This enables parallel computation where states are computed
160    /// independently on data partitions and then combined.
161    ///
162    /// # Arguments
163    ///
164    /// * `states` - States to merge together
165    ///
166    /// # Returns
167    ///
168    /// The merged state or an error if states cannot be merged
169    fn merge(states: Vec<Self>) -> AnalyzerResult<Self>
170    where
171        Self: Sized;
172
173    /// Returns whether this state represents an empty computation.
174    ///
175    /// Used to optimize away empty states during merging.
176    fn is_empty(&self) -> bool {
177        false
178    }
179}