use std::sync::Arc;
use ansi_to_tui::IntoText;
use memchr::memchr;
use ratatui::text::Text;
use serde_json::Value;
use tokio_util::sync::CancellationToken;
use super::types::{ProcessedResult, QueryError, RenderedLine, RenderedSpan};
use crate::query::query_state::ResultType;
pub fn preprocess_result(
output: String,
query: &str,
cancel_token: &CancellationToken,
array_sample_size: usize,
) -> Result<ProcessedResult, QueryError> {
if cancel_token.is_cancelled() {
return Err(QueryError::Cancelled);
}
let unformatted = strip_ansi_codes(&output);
if cancel_token.is_cancelled() {
return Err(QueryError::Cancelled);
}
let (line_count, max_width, line_widths, is_only_nulls) = compute_line_metrics(&unformatted);
if cancel_token.is_cancelled() {
return Err(QueryError::Cancelled);
}
let rendered_lines = parse_ansi_to_rendered_lines(&output, cancel_token)?;
if cancel_token.is_cancelled() {
return Err(QueryError::Cancelled);
}
let (parsed, result_type) = parse_and_detect_type(&unformatted, array_sample_size);
let parsed = parsed.map(Arc::new);
let base_query = normalize_base_query(query);
Ok(ProcessedResult {
output: Arc::new(output),
unformatted: Arc::new(unformatted),
rendered_lines,
parsed,
line_count,
max_width,
line_widths,
result_type,
query: base_query,
execution_time_ms: None,
is_only_nulls,
})
}
fn compute_line_metrics(output: &str) -> (u32, u16, Arc<Vec<u16>>, bool) {
let mut line_count: u32 = 0;
let mut max_width: usize = 0;
let mut widths: Vec<u16> = Vec::new();
let mut is_only_nulls = true;
for line in output.lines() {
line_count += 1;
let width = line.len().min(u16::MAX as usize);
widths.push(width as u16);
if width > max_width {
max_width = width;
}
let trimmed = line.trim();
if !trimmed.is_empty() && trimmed != "null" {
is_only_nulls = false;
}
}
(
line_count,
max_width.min(u16::MAX as usize) as u16,
Arc::new(widths),
is_only_nulls,
)
}
fn parse_ansi_to_rendered_lines(
output: &str,
cancel_token: &CancellationToken,
) -> Result<Vec<RenderedLine>, QueryError> {
if cancel_token.is_cancelled() {
return Err(QueryError::Cancelled);
}
let text: Text = output
.as_bytes()
.into_text()
.unwrap_or_else(|_| Text::raw(output.to_string()));
let rendered_lines = text
.lines
.into_iter()
.enumerate()
.map(|(idx, line)| {
if idx % 10000 == 0 && cancel_token.is_cancelled() {
return Err(QueryError::Cancelled);
}
let spans = line
.spans
.into_iter()
.map(|span| RenderedSpan {
content: span.content.to_string(),
style: span.style,
})
.collect();
Ok(RenderedLine { spans })
})
.collect::<Result<Vec<_>, _>>()?;
Ok(rendered_lines)
}
pub fn strip_ansi_codes(s: &str) -> String {
let bytes = s.as_bytes();
let mut result = Vec::with_capacity(bytes.len());
let mut pos = 0;
while let Some(esc_offset) = memchr(b'\x1b', &bytes[pos..]) {
let esc_pos = pos + esc_offset;
result.extend_from_slice(&bytes[pos..esc_pos]);
pos = skip_csi_sequence(bytes, esc_pos);
}
result.extend_from_slice(&bytes[pos..]);
unsafe { String::from_utf8_unchecked(result) }
}
fn skip_csi_sequence(bytes: &[u8], start: usize) -> usize {
let mut pos = start + 1;
if pos < bytes.len() && bytes[pos] == b'[' {
pos += 1; while pos < bytes.len() {
if bytes[pos] == b'm' {
return pos + 1;
}
pos += 1;
}
}
pos
}
pub fn parse_and_detect_type(text: &str, array_sample_size: usize) -> (Option<Value>, ResultType) {
let text = text.trim();
if text.is_empty() {
return (None, ResultType::Null);
}
if let Ok(value) = serde_json::from_str::<Value>(text) {
let result_type = value_to_result_type(&value, false);
return (Some(value), result_type);
}
let mut deserializer = serde_json::Deserializer::from_str(text).into_iter();
let first_value = match deserializer.next() {
Some(Ok(v)) => v,
_ => return (None, ResultType::Null),
};
let second_value = deserializer.next();
let has_multiple = second_value.is_some();
let mut result_type = value_to_result_type(&first_value, has_multiple);
if has_multiple
&& !matches!(
result_type,
ResultType::DestructuredObjects | ResultType::ArrayOfObjects
)
&& let Some(Ok(ref second)) = second_value
{
match second {
Value::Object(_) => result_type = ResultType::DestructuredObjects,
Value::Array(arr) if matches!(arr.first(), Some(Value::Object(_))) => {
result_type = ResultType::ArrayOfObjects;
}
_ => {}
}
}
if result_type == ResultType::DestructuredObjects {
let mut merged = match first_value {
Value::Object(map) => map,
_ => serde_json::Map::new(),
};
if let Some(Ok(Value::Object(map))) = second_value {
for (key, val) in map {
merged.entry(key).or_insert(val);
}
}
let remaining = array_sample_size.saturating_sub(2);
for result in deserializer.take(remaining) {
if let Ok(Value::Object(map)) = result {
for (key, val) in map {
merged.entry(key).or_insert(val);
}
}
}
return (Some(Value::Object(merged)), result_type);
}
if has_multiple && result_type == ResultType::ArrayOfObjects {
let mut merged = match first_value {
Value::Array(arr) => arr,
_ => Vec::new(),
};
if let Some(Ok(Value::Array(arr))) = second_value {
merged.extend(arr);
}
let remaining = array_sample_size.saturating_sub(2);
for result in deserializer.take(remaining) {
if let Ok(Value::Array(arr)) = result {
merged.extend(arr);
}
}
return (Some(Value::Array(merged)), result_type);
}
(Some(first_value), result_type)
}
fn value_to_result_type(value: &Value, has_multiple: bool) -> ResultType {
match value {
Value::Object(_) if has_multiple => ResultType::DestructuredObjects,
Value::Object(_) => ResultType::Object,
Value::Array(arr) => {
if arr.is_empty() {
ResultType::Array
} else if matches!(arr.first(), Some(Value::Object(_))) {
ResultType::ArrayOfObjects
} else {
ResultType::Array
}
}
Value::String(_) => ResultType::String,
Value::Number(_) => ResultType::Number,
Value::Bool(_) => ResultType::Boolean,
Value::Null => ResultType::Null,
}
}
fn normalize_base_query(query: &str) -> String {
let mut base = query.trim_end().to_string();
if base.ends_with(" | .") {
base = base[..base.len() - 4].trim_end().to_string();
}
else if base.ends_with(" |") {
base = base[..base.len() - 2].trim_end().to_string();
}
else if base.ends_with('.') && base.len() > 1 {
base = base[..base.len() - 1].to_string();
}
base
}
#[cfg(test)]
#[path = "preprocess_tests.rs"]
mod preprocess_tests;