use serde::Serialize;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum OutputFormat {
#[default]
Json,
Jsonl,
Minimal,
Table,
}
impl OutputFormat {
pub fn from_str_loose(s: &str) -> Self {
match s.to_lowercase().as_str() {
"json" => OutputFormat::Json,
"jsonl" | "ndjson" => OutputFormat::Jsonl,
"minimal" | "compact" => OutputFormat::Minimal,
"table" | "human" | "text" => OutputFormat::Table,
_ => OutputFormat::Json,
}
}
pub fn name(&self) -> &'static str {
match self {
OutputFormat::Json => "json",
OutputFormat::Jsonl => "jsonl",
OutputFormat::Minimal => "minimal",
OutputFormat::Table => "table",
}
}
}
impl std::str::FromStr for OutputFormat {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"json" => Ok(OutputFormat::Json),
"jsonl" | "ndjson" => Ok(OutputFormat::Jsonl),
"minimal" | "compact" => Ok(OutputFormat::Minimal),
"table" | "human" | "text" => Ok(OutputFormat::Table),
_ => Err(format!(
"Unknown format '{}'. Valid formats: json, jsonl, minimal, table",
s
)),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub enum FieldMode {
#[default]
Full,
Summary,
Minimal,
Custom(Vec<String>),
}
impl FieldMode {
pub fn from_str_loose(s: &str) -> Self {
if s.starts_with("custom:") {
let fields: Vec<String> = s
.strip_prefix("custom:")
.unwrap_or("")
.split(',')
.map(|f| f.trim().to_string())
.filter(|f| !f.is_empty())
.collect();
return FieldMode::Custom(fields);
}
match s.to_lowercase().as_str() {
"full" => FieldMode::Full,
"summary" => FieldMode::Summary,
"minimal" => FieldMode::Minimal,
_ => FieldMode::Full,
}
}
}
#[derive(Debug, Clone)]
pub struct RobotConfig {
pub format: OutputFormat,
pub max_tokens: Option<usize>,
pub max_results: Option<usize>,
pub max_content_length: Option<usize>,
pub fields: FieldMode,
pub enabled: bool,
}
impl Default for RobotConfig {
fn default() -> Self {
Self {
format: OutputFormat::Json,
max_tokens: None,
max_results: Some(10),
max_content_length: None,
fields: FieldMode::Full,
enabled: false,
}
}
}
impl RobotConfig {
pub fn new() -> Self {
Self {
enabled: true,
..Default::default()
}
}
pub fn with_format(mut self, format: OutputFormat) -> Self {
self.format = format;
self
}
pub fn with_max_tokens(mut self, max_tokens: usize) -> Self {
self.max_tokens = Some(max_tokens);
self
}
pub fn with_max_results(mut self, max_results: usize) -> Self {
self.max_results = Some(max_results);
self
}
pub fn with_max_content_length(mut self, max_content_length: usize) -> Self {
self.max_content_length = Some(max_content_length);
self
}
pub fn with_fields(mut self, fields: FieldMode) -> Self {
self.fields = fields;
self
}
pub fn is_robot_mode(&self) -> bool {
self.enabled
}
}
pub struct RobotFormatter {
config: RobotConfig,
}
impl RobotFormatter {
pub fn new(config: RobotConfig) -> Self {
Self { config }
}
pub fn format<T: Serialize>(&self, value: &T) -> Result<String, serde_json::Error> {
match self.config.format {
OutputFormat::Json => serde_json::to_string_pretty(value),
OutputFormat::Jsonl | OutputFormat::Minimal => serde_json::to_string(value),
OutputFormat::Table => {
serde_json::to_string_pretty(value)
}
}
}
pub fn format_stream<T: Serialize, I: IntoIterator<Item = T>>(
&self,
values: I,
) -> Result<String, serde_json::Error> {
let lines: Result<Vec<String>, _> = values
.into_iter()
.map(|v| serde_json::to_string(&v))
.collect();
Ok(lines?.join("\n"))
}
pub fn truncate_content(&self, content: &str) -> (String, bool) {
if let Some(max_len) = self.config.max_content_length {
if content.len() > max_len {
let truncated = if let Some(pos) = content[..max_len].rfind(char::is_whitespace) {
&content[..pos]
} else {
&content[..max_len]
};
return (format!("{}...", truncated), true);
}
}
(content.to_string(), false)
}
pub fn estimate_tokens(&self, text: &str) -> usize {
text.len() / 4
}
pub fn would_exceed_budget(&self, text: &str) -> bool {
if let Some(max_tokens) = self.config.max_tokens {
return self.estimate_tokens(text) > max_tokens;
}
false
}
pub fn config(&self) -> &RobotConfig {
&self.config
}
}
impl Default for RobotFormatter {
fn default() -> Self {
Self::new(RobotConfig::default())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_output_format_parsing() {
assert_eq!(OutputFormat::from_str_loose("json"), OutputFormat::Json);
assert_eq!(OutputFormat::from_str_loose("JSONL"), OutputFormat::Jsonl);
assert_eq!(OutputFormat::from_str_loose("ndjson"), OutputFormat::Jsonl);
assert_eq!(
OutputFormat::from_str_loose("minimal"),
OutputFormat::Minimal
);
assert_eq!(OutputFormat::from_str_loose("table"), OutputFormat::Table);
assert_eq!(OutputFormat::from_str_loose("unknown"), OutputFormat::Json);
}
#[test]
fn test_field_mode_parsing() {
assert_eq!(FieldMode::from_str_loose("full"), FieldMode::Full);
assert_eq!(FieldMode::from_str_loose("summary"), FieldMode::Summary);
assert_eq!(FieldMode::from_str_loose("minimal"), FieldMode::Minimal);
assert_eq!(
FieldMode::from_str_loose("custom:title,url,score"),
FieldMode::Custom(vec![
"title".to_string(),
"url".to_string(),
"score".to_string()
])
);
}
#[test]
fn test_formatter_truncation() {
let config = RobotConfig::new().with_max_content_length(20);
let formatter = RobotFormatter::new(config);
let (truncated, was_truncated) =
formatter.truncate_content("This is a very long string that should be truncated");
assert!(was_truncated);
assert!(truncated.len() <= 23);
let (not_truncated, was_truncated) = formatter.truncate_content("Short");
assert!(!was_truncated);
assert_eq!(not_truncated, "Short");
}
#[test]
fn test_formatter_token_estimation() {
let formatter = RobotFormatter::default();
assert_eq!(formatter.estimate_tokens("12345678"), 2);
assert_eq!(formatter.estimate_tokens(""), 0);
}
#[test]
fn test_formatter_json_output() {
let formatter = RobotFormatter::new(RobotConfig::new());
let data = serde_json::json!({"key": "value"});
let output = formatter.format(&data).unwrap();
assert!(output.contains("key"));
assert!(output.contains("value"));
}
}