use std::sync::Arc;
use crate::connection::{ArraySchema, ColumnRef};
use crate::path_query::{ColumnPredicate, SortDirection};
#[derive(Debug, Clone, Default)]
pub struct ResultMetrics {
pub soch_tokens: usize,
pub json_tokens_equivalent: usize,
pub rows_emitted: usize,
pub bytes_emitted: usize,
}
impl ResultMetrics {
pub fn token_savings_percent(&self) -> f64 {
if self.json_tokens_equivalent == 0 {
return 0.0;
}
(1.0 - (self.soch_tokens as f64 / self.json_tokens_equivalent as f64)) * 100.0
}
pub fn reduction_ratio(&self) -> f64 {
if self.json_tokens_equivalent == 0 {
return 1.0;
}
self.soch_tokens as f64 / self.json_tokens_equivalent as f64
}
}
#[allow(dead_code)]
pub struct SochResult<'a> {
path: String,
schema: Arc<ArraySchema>,
columns: Vec<ColumnRef>,
predicates: Vec<ColumnPredicate>,
order_by: Option<(String, SortDirection)>,
limit: Option<usize>,
offset: Option<usize>,
metrics: ResultMetrics,
rows: Vec<String>,
header_emitted: bool,
token_budget: Option<usize>,
_marker: std::marker::PhantomData<&'a ()>,
}
impl<'a> SochResult<'a> {
pub fn new(
path: String,
schema: Arc<ArraySchema>,
columns: Vec<ColumnRef>,
predicates: Vec<ColumnPredicate>,
order_by: Option<(String, SortDirection)>,
limit: Option<usize>,
offset: Option<usize>,
) -> Self {
Self {
path,
schema,
columns,
predicates,
order_by,
limit,
offset,
metrics: ResultMetrics::default(),
rows: Vec::new(),
header_emitted: false,
token_budget: None,
_marker: std::marker::PhantomData,
}
}
pub fn empty() -> Self {
Self {
path: String::new(),
schema: Arc::new(ArraySchema {
name: String::new(),
fields: vec![],
types: vec![],
}),
columns: vec![],
predicates: vec![],
order_by: None,
limit: None,
offset: None,
metrics: ResultMetrics::default(),
rows: Vec::new(),
header_emitted: false,
token_budget: None,
_marker: std::marker::PhantomData,
}
}
pub fn with_token_limit(mut self, budget: usize) -> Self {
self.token_budget = Some(budget);
self
}
pub fn metrics(&self) -> &ResultMetrics {
&self.metrics
}
pub fn path(&self) -> &str {
&self.path
}
pub fn column_names(&self) -> Vec<&str> {
self.columns.iter().map(|c| c.name.as_str()).collect()
}
pub fn to_soch_string(&mut self) -> String {
let mut output = String::new();
if !self.header_emitted {
let header = self.emit_header();
self.update_metrics(&header);
output.push_str(&header);
output.push('\n');
self.header_emitted = true;
}
output
}
pub fn stream_to<W: std::io::Write>(&mut self, writer: &mut W) -> std::io::Result<()> {
let output = self.to_soch_string();
writer.write_all(output.as_bytes())
}
pub fn collect(self) -> Vec<String> {
let mut result = vec![];
let col_names: Vec<_> = self.columns.iter().map(|c| c.name.as_str()).collect();
let header = format!(
"{}[{}]{{{}}}:",
self.path,
0, col_names.join(",")
);
result.push(header);
result
}
fn emit_header(&self) -> String {
let col_names: Vec<_> = self.columns.iter().map(|c| c.name.as_str()).collect();
format!(
"{}[{}]{{{}}}:",
self.path,
0, col_names.join(",")
)
}
fn update_metrics(&mut self, line: &str) {
let soch_tokens = estimate_tokens(line);
let json_tokens = estimate_json_equivalent_tokens(line, &self.column_names());
self.metrics.soch_tokens += soch_tokens;
self.metrics.json_tokens_equivalent += json_tokens;
self.metrics.bytes_emitted += line.len();
if self.metrics.rows_emitted > 0 || !line.contains('{') {
self.metrics.rows_emitted += 1;
}
if let Some(budget) = self.token_budget
&& self.metrics.soch_tokens > budget
{
}
}
}
fn estimate_tokens(s: &str) -> usize {
s.len().div_ceil(4)
}
fn estimate_json_equivalent_tokens(line: &str, fields: &[&str]) -> usize {
let field_overhead: usize = fields.iter().map(|f| f.len() + 5).sum();
let base_tokens = estimate_tokens(line);
base_tokens + field_overhead.div_ceil(4) }
#[cfg(test)]
mod tests {
use super::*;
use crate::connection::FieldType;
#[test]
fn test_result_metrics() {
let metrics = ResultMetrics {
soch_tokens: 100,
json_tokens_equivalent: 200,
..Default::default()
};
assert!((metrics.token_savings_percent() - 50.0).abs() < 0.1);
assert!((metrics.reduction_ratio() - 0.5).abs() < 0.1);
}
#[test]
fn test_empty_result() {
let result = SochResult::empty();
assert!(result.path().is_empty());
assert!(result.column_names().is_empty());
}
#[test]
fn test_token_estimation() {
assert_eq!(estimate_tokens("hello"), 2); assert_eq!(estimate_tokens("hello world test"), 4); }
#[test]
fn test_result_with_columns() {
let schema = Arc::new(ArraySchema {
name: "users".to_string(),
fields: vec!["id".to_string(), "name".to_string()],
types: vec![FieldType::UInt64, FieldType::Text],
});
let columns = vec![
ColumnRef {
id: 0,
name: "id".to_string(),
field_type: FieldType::UInt64,
},
ColumnRef {
id: 1,
name: "name".to_string(),
field_type: FieldType::Text,
},
];
let result = SochResult::new(
"users".to_string(),
schema,
columns,
vec![],
None,
None,
None,
);
assert_eq!(result.column_names(), vec!["id", "name"]);
}
}