use hyperloglog::HyperLogLog;
use indexmap::IndexMap;
use rhai::Dynamic;
use std::collections::{HashMap, HashSet};
use std::fmt;
use std::hash::{Hash, Hasher};
use unicode_width::UnicodeWidthStr;
const EXACT_CARDINALITY_THRESHOLD: usize = 256;
const MAX_SAMPLES: usize = 8;
const REDIRECTED_TABLE_WIDTH: usize = 200;
const MAX_TRACKED_FIELDS: usize = 1_000;
pub const DEFAULT_FLATTEN_DEPTH: usize = 3;
const MAX_DEDUP_TRACKING: usize = 1024;
const HLL_ERROR_RATE: f64 = 0.01;
const HLL_SEED: u128 = 0x6669656c645f646973636f76657279;
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum FieldType {
String,
Int,
Float,
Bool,
Null,
Array,
Map,
Char,
Other(std::string::String),
}
impl FieldType {
pub fn from_dynamic(value: &Dynamic) -> Self {
if value.is_unit() {
FieldType::Null
} else if value.is_string() {
FieldType::String
} else if value.is_int() {
FieldType::Int
} else if value.is_float() {
FieldType::Float
} else if value.is_bool() {
FieldType::Bool
} else if value.is_char() {
FieldType::Char
} else if value.is_array() {
FieldType::Array
} else if value.is_map() {
FieldType::Map
} else if value.is::<crate::rhai_functions::datetime::DateTimeWrapper>() {
FieldType::Other("datetime".to_string())
} else if value.is::<crate::rhai_functions::datetime::DurationWrapper>() {
FieldType::Other("duration".to_string())
} else {
FieldType::Other(value.type_name().to_string())
}
}
}
impl fmt::Display for FieldType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
FieldType::String => write!(f, "string"),
FieldType::Int => write!(f, "int"),
FieldType::Float => write!(f, "float"),
FieldType::Bool => write!(f, "bool"),
FieldType::Null => write!(f, "null"),
FieldType::Array => write!(f, "array"),
FieldType::Map => write!(f, "map"),
FieldType::Char => write!(f, "char"),
FieldType::Other(s) => write!(f, "{}", s),
}
}
}
enum CardinalityTracker {
Exact(HashSet<u64>),
Estimated(HyperLogLog),
}
impl CardinalityTracker {
fn new() -> Self {
CardinalityTracker::Exact(HashSet::new())
}
fn insert(&mut self, hash: u64) {
match self {
CardinalityTracker::Exact(set) => {
set.insert(hash);
if set.len() > EXACT_CARDINALITY_THRESHOLD {
let mut hll = HyperLogLog::new_deterministic(HLL_ERROR_RATE, HLL_SEED);
for &existing in set.iter() {
hll.insert(&existing);
}
*self = CardinalityTracker::Estimated(hll);
}
}
CardinalityTracker::Estimated(hll) => {
hll.insert(&hash);
}
}
}
fn cardinality(&self) -> (usize, bool) {
match self {
CardinalityTracker::Exact(set) => (set.len(), true),
CardinalityTracker::Estimated(hll) => (hll.len() as usize, false),
}
}
}
pub struct FieldProfile {
pub seen_count: usize,
pub events_seen: usize,
last_event: usize,
pub type_counts: HashMap<FieldType, usize>,
cardinality: CardinalityTracker,
pub samples: Vec<serde_json::Value>,
sample_hashes: HashSet<u64>,
distinct_samples_seen: usize,
pub array_size_range: Option<(usize, usize)>,
pub map_size_range: Option<(usize, usize)>,
}
impl FieldProfile {
fn new() -> Self {
Self {
seen_count: 0,
events_seen: 0,
last_event: 0,
type_counts: HashMap::new(),
cardinality: CardinalityTracker::new(),
samples: Vec::new(),
sample_hashes: HashSet::new(),
distinct_samples_seen: 0,
array_size_range: None,
map_size_range: None,
}
}
fn observe(&mut self, value: &Dynamic) {
self.seen_count += 1;
let ft = FieldType::from_dynamic(value);
*self.type_counts.entry(ft.clone()).or_insert(0) += 1;
match ft {
FieldType::Null => {
}
FieldType::Array => {
if let Ok(arr) = value.clone().into_array() {
let len = arr.len();
self.array_size_range = Some(match self.array_size_range {
Some((lo, hi)) => (lo.min(len), hi.max(len)),
None => (len, len),
});
}
}
FieldType::Map => {
if let Some(map) = value.clone().try_cast::<rhai::Map>() {
let len = map.len();
self.map_size_range = Some(match self.map_size_range {
Some((lo, hi)) => (lo.min(len), hi.max(len)),
None => (len, len),
});
}
}
_ => {
let display = scalar_display(value);
let sample_value = scalar_to_json(value);
let hash = hash_value(&ft, &display);
self.cardinality.insert(hash);
self.add_sample(hash, sample_value);
}
}
}
fn add_sample(&mut self, hash: u64, sample: serde_json::Value) {
if self.sample_hashes.len() < MAX_DEDUP_TRACKING {
if !self.sample_hashes.insert(hash) {
return;
}
} else if self.sample_hashes.contains(&hash) {
return;
}
self.distinct_samples_seen += 1;
if self.samples.len() < MAX_SAMPLES {
self.samples.push(sample);
} else {
let idx = fastrand::usize(0..self.distinct_samples_seen);
if idx < MAX_SAMPLES {
self.samples[idx] = sample;
}
}
}
pub fn cardinality(&self) -> (usize, bool) {
let (count, exact) = self.cardinality.cardinality();
if exact {
(count, true)
} else {
(count.min(self.scalar_observation_count()), false)
}
}
fn scalar_observation_count(&self) -> usize {
self.type_counts
.iter()
.filter(|(ft, _)| !matches!(ft, FieldType::Null | FieldType::Array | FieldType::Map))
.map(|(_, count)| *count)
.sum()
}
pub fn types_by_frequency(&self) -> Vec<(FieldType, usize)> {
let mut types: Vec<_> = self
.type_counts
.iter()
.map(|(ft, &c)| (ft.clone(), c))
.collect();
types.sort_by(|a, b| b.1.cmp(&a.1));
types
}
}
#[derive(Debug, Clone)]
pub struct FormatSummary {
pub format: std::string::String,
pub detection: &'static str,
pub counts: Vec<(std::string::String, usize)>,
pub unit: &'static str,
}
impl FormatSummary {
fn footer_fragment(&self) -> std::string::String {
if self.counts.len() > 1 {
let parts: Vec<std::string::String> = self
.counts
.iter()
.map(|(name, count)| format!("{} {}", name, count))
.collect();
format!("formats: {} ({})", parts.join(", "), self.unit)
} else {
let name = self
.counts
.first()
.map(|(n, _)| n.as_str())
.unwrap_or(&self.format);
let tag = match self.detection {
"auto" => " (auto-detected)",
"per-file" => " (per-file)",
"cascade" => " (cascade)",
_ => "",
};
format!("format: {}{}", name, tag)
}
}
}
#[derive(Debug, Clone)]
pub struct TimestampSummary {
pub field: std::string::String,
pub overridden: bool,
pub detected: usize,
pub parsed: usize,
}
impl TimestampSummary {
fn footer_fragment(&self) -> std::string::String {
let mut notes: Vec<std::string::String> = Vec::new();
if self.overridden {
notes.push("--ts-field".to_string());
}
if self.detected == 0 {
notes.push("not found".to_string());
} else if self.parsed < self.detected {
let pct = (self.parsed as f64 / self.detected as f64) * 100.0;
notes.push(format!("{:.0}% parsed", pct));
}
if notes.is_empty() {
format!("timestamp: {}", self.field)
} else {
format!("timestamp: {} ({})", self.field, notes.join(", "))
}
}
}
pub struct FieldDiscovery {
pub fields: IndexMap<std::string::String, FieldProfile>,
pub total_events: usize,
capped: bool,
flatten_depth_capped: bool,
flatten_depth: usize,
pub format_summary: Option<FormatSummary>,
pub timestamp_summary: Option<TimestampSummary>,
pub suggest_discover_final: bool,
}
impl Default for FieldDiscovery {
fn default() -> Self {
Self::with_depth(DEFAULT_FLATTEN_DEPTH)
}
}
impl FieldDiscovery {
#[cfg(test)]
pub fn new() -> Self {
Self::with_depth(DEFAULT_FLATTEN_DEPTH)
}
pub fn with_depth(flatten_depth: usize) -> Self {
Self {
fields: IndexMap::new(),
total_events: 0,
capped: false,
flatten_depth_capped: false,
flatten_depth,
format_summary: None,
timestamp_summary: None,
suggest_discover_final: false,
}
}
pub fn observe_event<S: std::hash::BuildHasher>(
&mut self,
fields: &IndexMap<std::string::String, Dynamic, S>,
) {
self.total_events += 1;
for (key, value) in fields {
self.observe_path(key, value, 1);
}
}
fn observe_path(&mut self, path: &str, value: &Dynamic, depth: usize) {
self.record(path, value);
if self.flatten_depth != 0 && depth >= self.flatten_depth {
if value.is_map() || value.is_array() {
self.flatten_depth_capped = true;
}
return;
}
if value.is_map() {
if let Some(map) = value.clone().try_cast::<rhai::Map>() {
for (k, v) in map.iter() {
let subkey = format!("{path}.{k}");
self.observe_path(&subkey, v, depth + 1);
}
}
} else if value.is_array() {
if let Ok(arr) = value.clone().into_array() {
let subkey = format!("{path}[]");
for elem in arr.iter() {
self.observe_path(&subkey, elem, depth + 1);
}
}
}
}
fn record(&mut self, path: &str, value: &Dynamic) {
let event_idx = self.total_events;
if let Some(profile) = self.fields.get_mut(path) {
profile.observe(value);
if profile.last_event != event_idx {
profile.last_event = event_idx;
profile.events_seen += 1;
}
} else {
if self.fields.len() >= MAX_TRACKED_FIELDS {
if !self.capped {
self.capped = true;
eprintln!(
"Warning: field discovery truncated at {} unique field names",
MAX_TRACKED_FIELDS
);
}
return;
}
let mut profile = FieldProfile::new();
profile.observe(value);
profile.last_event = event_idx;
profile.events_seen = 1;
self.fields.insert(path.to_string(), profile);
}
}
pub fn format_table(&self, use_unicode: bool) -> std::string::String {
let width = if crate::tty::is_stdout_tty() {
crate::tty::get_terminal_width()
} else {
std::env::var("COLUMNS")
.ok()
.and_then(|s| s.parse::<usize>().ok())
.filter(|&c| c > 0)
.unwrap_or(REDIRECTED_TABLE_WIDTH)
};
self.format_table_for_width(width, use_unicode)
}
fn format_table_for_width(
&self,
terminal_width: usize,
use_unicode: bool,
) -> std::string::String {
let glyphs = Glyphs::new(use_unicode);
if self.fields.is_empty() {
return format!("Scanned {} events: no fields found", self.total_events);
}
let mut output = std::string::String::new();
let terminal_width = terminal_width.max(36);
let mut entries: Vec<_> = self.fields.iter().collect();
entries.sort_by(|a, b| b.1.events_seen.cmp(&a.1.events_seen));
let rows: Vec<_> = entries
.iter()
.map(|(name, profile)| {
DiscoveryRow::from_profile(self.total_events, name, profile, &glyphs)
})
.collect();
if let Some(widths) = TableWidths::for_full_table(terminal_width, &rows) {
output.push_str(&pad_right_display("Field", widths.name));
output.push_str(" ");
output.push_str(&pad_right_display("Type", widths.types));
output.push_str(" ");
output.push_str(&pad_left_display("Seen", widths.seen));
output.push_str(" ");
output.push_str(&pad_left_display("Miss", widths.miss));
output.push_str(" ");
output.push_str(&pad_left_display("Uniq", widths.unique));
output.push_str(" Examples\n");
for row in &rows {
output.push_str(&pad_right_display(
&truncate_for_display(&row.name, widths.name, glyphs.ellipsis),
widths.name,
));
output.push_str(" ");
output.push_str(&pad_right_display(
&truncate_for_display(&row.types, widths.types, glyphs.ellipsis),
widths.types,
));
output.push_str(" ");
output.push_str(&pad_left_display(&row.seen_count.to_string(), widths.seen));
output.push_str(" ");
output.push_str(&pad_left_display(
&format!("{:.0}%", row.miss_pct),
widths.miss,
));
output.push_str(" ");
output.push_str(&pad_left_display(&row.unique, widths.unique));
output.push_str(" ");
output.push_str(&truncate_for_display(
&row.examples,
widths.examples,
glyphs.ellipsis,
));
output.push('\n');
}
} else if let Some(widths) = TableWidths::for_compact_table(terminal_width, &rows) {
output.push_str(&pad_right_display("Field", widths.name));
output.push_str(" ");
output.push_str(&pad_right_display("Type", widths.types));
output.push_str(" ");
output.push_str(&pad_left_display("Seen", widths.seen));
output.push_str(" ");
output.push_str(&pad_left_display("Miss", widths.miss));
output.push_str(" ");
output.push_str(&pad_left_display("Uniq", widths.unique));
output.push('\n');
for row in &rows {
output.push_str(&pad_right_display(
&truncate_for_display(&row.name, widths.name, glyphs.ellipsis),
widths.name,
));
output.push_str(" ");
output.push_str(&pad_right_display(
&truncate_for_display(&row.types, widths.types, glyphs.ellipsis),
widths.types,
));
output.push_str(" ");
output.push_str(&pad_left_display(&row.seen_count.to_string(), widths.seen));
output.push_str(" ");
output.push_str(&pad_left_display(
&format!("{:.0}%", row.miss_pct),
widths.miss,
));
output.push_str(" ");
output.push_str(&pad_left_display(&row.unique, widths.unique));
output.push('\n');
if !row.examples.is_empty() {
output.push_str(" ");
output.push_str(&truncate_for_display(
&row.examples,
terminal_width.saturating_sub(2),
glyphs.ellipsis,
));
output.push('\n');
}
}
} else {
for (idx, row) in rows.iter().enumerate() {
if idx > 0 {
output.push('\n');
}
output.push_str(&truncate_for_display(
&row.name,
terminal_width,
glyphs.ellipsis,
));
output.push('\n');
output.push_str(&format!(
" seen: {} miss: {:.0}%\n",
row.seen_count, row.miss_pct
));
output.push_str(" type: ");
output.push_str(&truncate_for_display(
&row.types,
terminal_width.saturating_sub(8),
glyphs.ellipsis,
));
output.push('\n');
output.push_str(" unique: ");
output.push_str(&truncate_for_display(
&row.unique,
terminal_width.saturating_sub(10),
glyphs.ellipsis,
));
output.push('\n');
if !row.examples.is_empty() {
output.push_str(" examples: ");
output.push_str(&truncate_for_display(
&row.examples,
terminal_width.saturating_sub(16),
glyphs.ellipsis,
));
output.push('\n');
}
}
}
if self.capped {
output.push_str(&format!(
"\n(Field tracking capped at {} unique field names)\n",
MAX_TRACKED_FIELDS
));
}
if self.flatten_depth_capped {
output.push_str(&format!(
"\nNote: Nested field flattening stopped at depth {}; deeper children are not shown. Use --discover-depth=N to descend further (0 = unlimited).\n",
self.flatten_depth
));
}
output.push_str(&format!("\n{} events scanned", self.total_events));
let sep = if use_unicode { " · " } else { " | " };
if let Some(summary) = &self.format_summary {
output.push_str(sep);
output.push_str(&summary.footer_fragment());
}
if let Some(ts) = &self.timestamp_summary {
output.push_str(sep);
output.push_str(&ts.footer_fragment());
if ts.parsed > 0 {
let arrow = if use_unicode { " → " } else { " -> " };
output.push_str(arrow);
output.push_str("meta.parsed_ts");
}
}
if self.suggest_discover_final {
output.push_str(
"\n\nTip: these are parsed input fields, before filters and transforms. \
Use --discover-final to profile the fields your pipeline actually emits.",
);
}
output
}
pub fn format_json(&self) -> std::string::String {
let mut fields_json = Vec::new();
for (name, profile) in &self.fields {
let types: Vec<serde_json::Value> = profile
.types_by_frequency()
.iter()
.map(|(ft, count)| {
serde_json::json!({
"type": ft.to_string(),
"count": count,
})
})
.collect();
let (card_count, card_exact) = profile.cardinality();
let mut field_obj = serde_json::json!({
"name": name,
"seen": profile.events_seen,
"observations": profile.seen_count,
"missing": self.total_events.saturating_sub(profile.events_seen),
"types": types,
"cardinality": {
"count": card_count,
"exact": card_exact,
},
"samples": profile.samples,
});
if self
.timestamp_summary
.as_ref()
.is_some_and(|t| t.field == *name)
{
field_obj["timestamp"] = serde_json::json!(true);
}
if let Some((lo, hi)) = profile.array_size_range {
field_obj["array_size"] = serde_json::json!({"min": lo, "max": hi});
}
if let Some((lo, hi)) = profile.map_size_range {
field_obj["map_size"] = serde_json::json!({"min": lo, "max": hi});
}
fields_json.push(field_obj);
}
let mut result = serde_json::json!({
"total_events": self.total_events,
"fields": fields_json,
"truncated": self.capped,
"flatten_depth_limit": self.flatten_depth,
"flatten_depth_capped": self.flatten_depth_capped,
});
if let Some(summary) = &self.format_summary {
result["format"] = serde_json::json!(summary.format);
result["format_detection"] = serde_json::json!(summary.detection);
if !summary.counts.is_empty() {
let counts: serde_json::Map<std::string::String, serde_json::Value> = summary
.counts
.iter()
.map(|(name, count)| (name.clone(), serde_json::json!(count)))
.collect();
result["format_counts"] = serde_json::Value::Object(counts);
result["format_count_unit"] = serde_json::json!(summary.unit);
}
}
if let Some(ts) = &self.timestamp_summary {
result["timestamp"] = serde_json::json!({
"field": ts.field,
"source": if ts.overridden { "ts-field" } else { "auto" },
"detected": ts.detected,
"parsed": ts.parsed,
});
}
serde_json::to_string_pretty(&result).unwrap_or_else(|_| "{}".to_string())
}
}
struct Glyphs {
ellipsis: &'static str,
em_dash: &'static str,
}
impl Glyphs {
fn new(use_unicode: bool) -> Self {
if use_unicode {
Self {
ellipsis: "\u{2026}",
em_dash: "\u{2014}",
}
} else {
Self {
ellipsis: "...",
em_dash: "-",
}
}
}
}
fn format_types(profile: &FieldProfile) -> std::string::String {
let types = profile.types_by_frequency();
if types.is_empty() {
return "-".to_string();
}
let parts: Vec<std::string::String> = types
.iter()
.map(|(ft, _count)| match ft {
FieldType::Array => {
if let Some((lo, hi)) = profile.array_size_range {
if lo == hi {
format!("array({})", lo)
} else {
format!("array({}..{})", lo, hi)
}
} else {
"array".to_string()
}
}
FieldType::Map => {
if let Some((lo, hi)) = profile.map_size_range {
if lo == hi {
format!("map({})", lo)
} else {
format!("map({}..{})", lo, hi)
}
} else {
"map".to_string()
}
}
_ => ft.to_string(),
})
.collect();
parts.join(", ")
}
fn format_cardinality(profile: &FieldProfile, em_dash: &str) -> std::string::String {
let (count, exact) = profile.cardinality();
let has_scalar = profile
.type_counts
.keys()
.any(|ft| !matches!(ft, FieldType::Map | FieldType::Array | FieldType::Null));
if !has_scalar || count == 0 {
return em_dash.to_string();
}
if exact {
format!("{}", count)
} else {
format!("~{}", count)
}
}
fn format_examples(profile: &FieldProfile, ellipsis: &str) -> std::string::String {
if profile.samples.is_empty() {
return std::string::String::new();
}
let joined = profile
.samples
.iter()
.map(sample_json_display)
.collect::<Vec<_>>()
.join(", ");
let (count, exact) = profile.cardinality();
if exact && count <= profile.samples.len() {
joined
} else {
format!("{joined}, {ellipsis}")
}
}
fn scalar_display(value: &Dynamic) -> std::string::String {
if value.is_string() {
if let Ok(s) = value.clone().into_string() {
return s;
}
}
if value.is_bool() {
if let Ok(b) = value.as_bool() {
return b.to_string();
}
}
if value.is_int() {
if let Ok(i) = value.as_int() {
return i.to_string();
}
}
if value.is_float() {
if let Ok(f) = value.as_float() {
return format!("{f}");
}
}
if value.is_char() {
if let Ok(c) = value.as_char() {
return c.to_string();
}
}
value.to_string()
}
fn scalar_to_json(value: &Dynamic) -> serde_json::Value {
if value.is_string() {
value
.clone()
.into_string()
.map(serde_json::Value::String)
.unwrap_or(serde_json::Value::Null)
} else if value.is_int() {
value
.as_int()
.map(|i| serde_json::Value::Number(serde_json::Number::from(i)))
.unwrap_or(serde_json::Value::Null)
} else if value.is_float() {
value
.as_float()
.ok()
.and_then(serde_json::Number::from_f64)
.map(serde_json::Value::Number)
.unwrap_or(serde_json::Value::Null)
} else if value.is_bool() {
value
.as_bool()
.map(serde_json::Value::Bool)
.unwrap_or(serde_json::Value::Null)
} else if value.is_char() {
value
.as_char()
.map(|c| serde_json::Value::String(c.to_string()))
.unwrap_or(serde_json::Value::Null)
} else if value.is_unit() {
serde_json::Value::Null
} else {
serde_json::Value::String(value.to_string())
}
}
fn sample_json_display(value: &serde_json::Value) -> std::string::String {
match value {
serde_json::Value::String(s) => {
format!("\"{}\"", crate::formatters::escape_for_display(s))
}
serde_json::Value::Null => "null".to_string(),
_ => value.to_string(),
}
}
fn hash_value(ft: &FieldType, display: &str) -> u64 {
let mut hasher = std::collections::hash_map::DefaultHasher::new();
ft.hash(&mut hasher);
display.hash(&mut hasher);
hasher.finish()
}
fn truncate_for_display(s: &str, max_chars: usize, ellipsis: &str) -> std::string::String {
let ell_width = ellipsis.chars().count();
if max_chars <= ell_width {
return ".".repeat(max_chars);
}
let char_count = s.chars().count();
if char_count <= max_chars {
return s.to_string();
}
let keep = max_chars - ell_width;
let mut out = s.chars().take(keep).collect::<std::string::String>();
out.push_str(ellipsis);
out
}
fn display_width(s: &str) -> usize {
UnicodeWidthStr::width(s)
}
fn pad_right_display(s: &str, width: usize) -> std::string::String {
let current = display_width(s);
if current >= width {
return s.to_string();
}
format!("{s}{}", " ".repeat(width - current))
}
fn pad_left_display(s: &str, width: usize) -> std::string::String {
let current = display_width(s);
if current >= width {
return s.to_string();
}
format!("{}{s}", " ".repeat(width - current))
}
struct TableWidths {
name: usize,
seen: usize,
miss: usize,
types: usize,
unique: usize,
examples: usize,
}
impl TableWidths {
fn for_full_table(terminal_width: usize, rows: &[DiscoveryRow]) -> Option<Self> {
let seen = row_width(rows, |row| row.seen_count.to_string().len(), "Seen");
let miss = 4;
let unique = row_width(rows, |row| display_width(&row.unique), "Uniq");
let separators = 10;
let layout_min_name = 12;
let layout_min_types = 6;
let layout_min_examples = 8;
let max_name = row_width(rows, |row| display_width(&row.name), "Field").min(40);
let max_types = row_width(rows, |row| display_width(&row.types), "Type").min(30);
let max_examples = row_width(rows, |row| display_width(&row.examples), "Examples");
let available = terminal_width.checked_sub(seen + miss + unique + separators)?;
let floor_name = layout_min_name.min(max_name);
let floor_types = layout_min_types.min(max_types);
let floor_examples = layout_min_examples.min(max_examples.max(layout_min_examples));
if available < floor_name + floor_types + floor_examples {
return None;
}
let mut name = floor_name;
let mut types = floor_types;
let mut examples = floor_examples;
let mut remaining = available.saturating_sub(name + types + examples);
let name_target = max_name.min(26);
let type_target = max_types.min(18);
grow_width(&mut types, type_target, &mut remaining);
grow_width(&mut name, name_target, &mut remaining);
grow_width(&mut types, max_types, &mut remaining);
grow_width(&mut name, max_name, &mut remaining);
grow_width(&mut examples, max_examples, &mut remaining);
Some(Self {
name,
seen,
miss,
types,
unique,
examples,
})
}
fn for_compact_table(terminal_width: usize, rows: &[DiscoveryRow]) -> Option<Self> {
let seen = row_width(rows, |row| row.seen_count.to_string().len(), "Seen");
let miss = 4;
let unique = row_width(rows, |row| display_width(&row.unique), "Uniq");
let separators = 8;
let layout_min_name = 12;
let layout_min_types = 6;
let max_name = row_width(rows, |row| display_width(&row.name), "Field").min(40);
let max_types = row_width(rows, |row| display_width(&row.types), "Type").min(30);
let available = terminal_width.checked_sub(seen + miss + unique + separators)?;
let floor_name = layout_min_name.min(max_name);
let floor_types = layout_min_types.min(max_types);
if available < floor_name + floor_types {
return None;
}
let (name, types) = if max_name + max_types <= available {
(max_name, max_types)
} else {
let types = max_types.min(available.saturating_sub(floor_name));
let name = available.saturating_sub(types).min(max_name);
(name, types)
};
if name < floor_name || types < floor_types {
return None;
}
Some(Self {
name,
seen,
miss,
types,
unique,
examples: 0,
})
}
}
fn row_width<F>(rows: &[DiscoveryRow], f: F, header: &str) -> usize
where
F: Fn(&DiscoveryRow) -> usize,
{
rows.iter()
.map(f)
.max()
.unwrap_or(0)
.max(display_width(header))
}
fn grow_width(current: &mut usize, target: usize, remaining: &mut usize) {
if *current >= target || *remaining == 0 {
return;
}
let growth = (target - *current).min(*remaining);
*current += growth;
*remaining -= growth;
}
struct DiscoveryRow {
name: std::string::String,
seen_count: usize,
miss_pct: f64,
types: std::string::String,
unique: std::string::String,
examples: std::string::String,
}
impl DiscoveryRow {
fn from_profile(
total_events: usize,
name: &str,
profile: &FieldProfile,
glyphs: &Glyphs,
) -> Self {
let missing = total_events.saturating_sub(profile.events_seen);
let miss_pct = if total_events > 0 {
(missing as f64 / total_events as f64) * 100.0
} else {
0.0
};
Self {
name: name.to_string(),
seen_count: profile.events_seen,
miss_pct,
types: format_types(profile),
unique: format_cardinality(profile, glyphs.em_dash),
examples: format_examples(profile, glyphs.ellipsis),
}
}
}
use std::cell::RefCell;
use std::sync::atomic::{AtomicBool, Ordering};
static ENABLED: AtomicBool = AtomicBool::new(false);
static DISCOVER_FINAL: AtomicBool = AtomicBool::new(false);
static FLATTEN_DEPTH: std::sync::atomic::AtomicUsize =
std::sync::atomic::AtomicUsize::new(DEFAULT_FLATTEN_DEPTH);
thread_local! {
static THREAD_DISCOVERY: RefCell<FieldDiscovery> = RefCell::new(FieldDiscovery::with_depth(
FLATTEN_DEPTH.load(Ordering::Relaxed),
));
}
pub fn enable(discover_final: bool, flatten_depth: usize) {
FLATTEN_DEPTH.store(flatten_depth, Ordering::Relaxed);
ENABLED.store(true, Ordering::Relaxed);
DISCOVER_FINAL.store(discover_final, Ordering::Relaxed);
}
pub fn is_enabled() -> bool {
ENABLED.load(Ordering::Relaxed)
}
pub fn is_discover_final() -> bool {
DISCOVER_FINAL.load(Ordering::Relaxed)
}
pub fn observe_event_fields<S: std::hash::BuildHasher>(fields: &IndexMap<String, Dynamic, S>) {
if !is_enabled() {
return;
}
THREAD_DISCOVERY.with(|d| d.borrow_mut().observe_event(fields));
}
pub fn take_thread_discovery() -> FieldDiscovery {
let depth = FLATTEN_DEPTH.load(Ordering::Relaxed);
THREAD_DISCOVERY.with(|d| {
let mut discovery = d.borrow_mut();
std::mem::replace(&mut *discovery, FieldDiscovery::with_depth(depth))
})
}
#[cfg(test)]
mod tests {
use super::*;
fn make_string(s: &str) -> Dynamic {
Dynamic::from(s.to_string())
}
fn make_int(i: i64) -> Dynamic {
Dynamic::from(i)
}
fn make_float(f: f64) -> Dynamic {
Dynamic::from(f)
}
fn make_bool(b: bool) -> Dynamic {
Dynamic::from(b)
}
fn make_null() -> Dynamic {
Dynamic::UNIT
}
fn make_array(items: Vec<Dynamic>) -> Dynamic {
Dynamic::from(rhai::Array::from(items))
}
fn make_map(pairs: Vec<(&str, Dynamic)>) -> Dynamic {
let mut map = rhai::Map::new();
for (k, v) in pairs {
map.insert(k.into(), v);
}
Dynamic::from(map)
}
#[test]
fn test_field_type_classification() {
assert_eq!(
FieldType::from_dynamic(&make_string("hello")),
FieldType::String
);
assert_eq!(FieldType::from_dynamic(&make_int(42)), FieldType::Int);
assert_eq!(FieldType::from_dynamic(&make_float(2.5)), FieldType::Float);
assert_eq!(FieldType::from_dynamic(&make_bool(true)), FieldType::Bool);
assert_eq!(FieldType::from_dynamic(&make_null()), FieldType::Null);
assert_eq!(
FieldType::from_dynamic(&make_array(vec![])),
FieldType::Array
);
assert_eq!(FieldType::from_dynamic(&make_map(vec![])), FieldType::Map);
}
#[test]
fn test_basic_field_profile() {
let mut profile = FieldProfile::new();
profile.observe(&make_string("hello"));
profile.observe(&make_string("world"));
profile.observe(&make_string("hello"));
assert_eq!(profile.seen_count, 3);
assert_eq!(profile.type_counts[&FieldType::String], 3);
let (card, exact) = profile.cardinality();
assert!(exact);
assert_eq!(card, 2); assert_eq!(profile.samples.len(), 2);
}
#[test]
fn test_mixed_types() {
let mut profile = FieldProfile::new();
profile.observe(&make_int(200));
profile.observe(&make_int(404));
profile.observe(&make_string("N/A"));
assert_eq!(profile.seen_count, 3);
assert_eq!(profile.type_counts[&FieldType::Int], 2);
assert_eq!(profile.type_counts[&FieldType::String], 1);
let types = profile.types_by_frequency();
assert_eq!(types[0].0, FieldType::Int); }
#[test]
fn test_null_not_counted_in_cardinality() {
let mut profile = FieldProfile::new();
profile.observe(&make_null());
profile.observe(&make_null());
profile.observe(&make_string("value"));
assert_eq!(profile.seen_count, 3);
assert_eq!(profile.type_counts[&FieldType::Null], 2);
let (card, exact) = profile.cardinality();
assert!(exact);
assert_eq!(card, 1); }
#[test]
fn test_int_vs_string_distinct_cardinality() {
let mut profile = FieldProfile::new();
profile.observe(&make_int(42));
profile.observe(&make_string("42"));
let (card, exact) = profile.cardinality();
assert!(exact);
assert_eq!(card, 2); }
#[test]
fn test_array_size_range() {
let mut profile = FieldProfile::new();
profile.observe(&make_array(vec![make_int(1), make_int(2)]));
profile.observe(&make_array(vec![
make_int(1),
make_int(2),
make_int(3),
make_int(4),
make_int(5),
]));
assert_eq!(profile.array_size_range, Some((2, 5)));
let (card, _) = profile.cardinality();
assert_eq!(card, 0); }
#[test]
fn test_map_size_range() {
let mut profile = FieldProfile::new();
profile.observe(&make_map(vec![("a", make_int(1))]));
profile.observe(&make_map(vec![
("a", make_int(1)),
("b", make_int(2)),
("c", make_int(3)),
]));
assert_eq!(profile.map_size_range, Some((1, 3)));
}
#[test]
fn test_hll_graduation() {
let mut profile = FieldProfile::new();
for i in 0..300 {
profile.observe(&make_int(i));
}
let (card, exact) = profile.cardinality();
assert!(!exact, "Should have graduated to HLL");
assert!(
(270..=300).contains(&card),
"HLL estimate {} out of range",
card
);
}
#[test]
fn test_hll_estimate_clamped_to_scalar_observations() {
let mut profile = FieldProfile::new();
let n: usize = 400;
for i in 0..n {
profile.observe(&make_int(i as i64));
}
let (card, exact) = profile.cardinality();
assert!(!exact, "Should have graduated to HLL");
assert!(
card <= n,
"Cardinality {card} must not exceed scalar observation count {n}"
);
assert!(card > n / 2, "Clamped cardinality {card} unexpectedly low");
}
#[test]
fn test_hll_estimate_clamp_excludes_nulls() {
let mut profile = FieldProfile::new();
for i in 0..300 {
profile.observe(&make_int(i));
}
for _ in 0..100 {
profile.observe(&make_null());
}
let (card, exact) = profile.cardinality();
assert!(!exact);
assert!(
card <= 300,
"Cardinality {card} must be clamped to scalar count 300, \
not seen_count 400"
);
}
#[test]
fn test_field_discovery_basic() {
let mut discovery = FieldDiscovery::new();
let mut fields1 = IndexMap::new();
fields1.insert("level".to_string(), make_string("INFO"));
fields1.insert("message".to_string(), make_string("hello"));
fields1.insert("status".to_string(), make_int(200));
let mut fields2 = IndexMap::new();
fields2.insert("level".to_string(), make_string("ERROR"));
fields2.insert("message".to_string(), make_string("fail"));
discovery.observe_event(&fields1);
discovery.observe_event(&fields2);
assert_eq!(discovery.total_events, 2);
assert_eq!(discovery.fields.len(), 3);
assert_eq!(discovery.fields["level"].seen_count, 2);
assert_eq!(discovery.fields["status"].seen_count, 1);
}
#[test]
fn test_format_table_not_empty() {
let mut discovery = FieldDiscovery::new();
let mut fields = IndexMap::new();
fields.insert("level".to_string(), make_string("INFO"));
fields.insert("msg".to_string(), make_string("test"));
discovery.observe_event(&fields);
let table = discovery.format_table(true);
assert!(table.contains("1 events scanned"));
assert!(table.contains("level"));
assert!(table.contains("msg"));
assert!(table.contains("string"));
assert!(table.ends_with("1 events scanned"));
}
fn discovery_with_one_field() -> FieldDiscovery {
let mut discovery = FieldDiscovery::new();
let mut fields = IndexMap::new();
fields.insert("level".to_string(), make_string("INFO"));
discovery.observe_event(&fields);
discovery
}
#[test]
fn test_footer_format_single_auto() {
let mut discovery = discovery_with_one_field();
discovery.format_summary = Some(FormatSummary {
format: "cef".to_string(),
detection: "auto",
counts: vec![],
unit: "",
});
let table = discovery.format_table(true);
assert!(table.contains("1 events scanned \u{b7} format: cef (auto-detected)"));
let ascii = discovery.format_table_for_width(80, false);
assert!(ascii.contains("1 events scanned | format: cef (auto-detected)"));
}
#[test]
fn test_footer_format_explicit_has_no_tag() {
let mut discovery = discovery_with_one_field();
discovery.format_summary = Some(FormatSummary {
format: "line".to_string(),
detection: "explicit",
counts: vec![],
unit: "",
});
let table = discovery.format_table(true);
assert!(table.contains("format: line"));
assert!(!table.contains("auto-detected"));
assert!(!table.contains('('));
}
#[test]
fn test_footer_format_multi_lists_counts_with_unit() {
let mut discovery = discovery_with_one_field();
discovery.format_summary = Some(FormatSummary {
format: "mixed".to_string(),
detection: "cascade",
counts: vec![("cef".to_string(), 12), ("json".to_string(), 3)],
unit: "events",
});
let table = discovery.format_table(true);
assert!(table.contains("formats: cef 12, json 3 (events)"));
}
#[test]
fn test_footer_format_absent_when_no_summary() {
let discovery = discovery_with_one_field();
let table = discovery.format_table(true);
assert!(table.contains("1 events scanned"));
assert!(!table.contains("format:"));
assert!(!table.contains("formats:"));
}
fn discovery_with_ts_field() -> FieldDiscovery {
let mut discovery = FieldDiscovery::new();
let mut fields = IndexMap::new();
fields.insert("ts".to_string(), make_string("2024-01-01T00:00:00Z"));
fields.insert("level".to_string(), make_string("INFO"));
discovery.observe_event(&fields);
discovery
}
#[test]
fn test_timestamp_footer_names_field_without_marking_rows() {
let mut discovery = discovery_with_ts_field();
discovery.timestamp_summary = Some(TimestampSummary {
field: "ts".to_string(),
overridden: false,
detected: 1,
parsed: 1,
});
let ascii = discovery.format_table_for_width(120, false);
assert!(ascii.contains("| timestamp: ts"), "{ascii}");
assert!(
ascii.lines().all(|l| !l.contains("(ts)")),
"rows must not carry a marker: {ascii}"
);
}
#[test]
fn test_timestamp_footer_points_at_meta_parsed_ts() {
let mut discovery = discovery_with_ts_field();
discovery.timestamp_summary = Some(TimestampSummary {
field: "ts".to_string(),
overridden: false,
detected: 1,
parsed: 1,
});
let ascii = discovery.format_table_for_width(120, false);
assert!(ascii.contains("timestamp: ts -> meta.parsed_ts"), "{ascii}");
let unicode = discovery.format_table_for_width(120, true);
assert!(
unicode.contains("timestamp: ts → meta.parsed_ts"),
"{unicode}"
);
}
#[test]
fn test_timestamp_footer_omits_pointer_when_nothing_parsed() {
let mut discovery = discovery_with_ts_field();
discovery.timestamp_summary = Some(TimestampSummary {
field: "nonexistent".to_string(),
overridden: true,
detected: 0,
parsed: 0,
});
let table = discovery.format_table_for_width(120, false);
assert!(!table.contains("meta.parsed_ts"), "{table}");
}
#[test]
fn test_timestamp_footer_flags_low_parse_rate() {
let mut discovery = discovery_with_ts_field();
discovery.timestamp_summary = Some(TimestampSummary {
field: "ts".to_string(),
overridden: false,
detected: 10,
parsed: 6,
});
let table = discovery.format_table_for_width(120, false);
assert!(table.contains("timestamp: ts (60% parsed)"), "{table}");
}
#[test]
fn test_timestamp_footer_marks_override() {
let mut discovery = discovery_with_ts_field();
discovery.timestamp_summary = Some(TimestampSummary {
field: "ts".to_string(),
overridden: true,
detected: 1,
parsed: 1,
});
let table = discovery.format_table_for_width(120, false);
assert!(table.contains("timestamp: ts (--ts-field)"), "{table}");
}
#[test]
fn test_timestamp_footer_flags_missing_override() {
let mut discovery = discovery_with_ts_field();
discovery.timestamp_summary = Some(TimestampSummary {
field: "nonexistent".to_string(),
overridden: true,
detected: 0,
parsed: 0,
});
let table = discovery.format_table_for_width(120, false);
assert!(
table.contains("timestamp: nonexistent (--ts-field, not found)"),
"{table}"
);
}
#[test]
fn test_timestamp_absent_keeps_footer_quiet() {
let discovery = discovery_with_ts_field();
let table = discovery.format_table_for_width(120, false);
assert!(!table.contains("timestamp:"), "{table}");
assert!(!table.contains("(ts)"), "{table}");
}
#[test]
fn test_timestamp_json_keys() {
let mut discovery = discovery_with_ts_field();
discovery.timestamp_summary = Some(TimestampSummary {
field: "ts".to_string(),
overridden: false,
detected: 1,
parsed: 1,
});
let json: serde_json::Value =
serde_json::from_str(&discovery.format_json()).expect("valid json");
assert_eq!(json["timestamp"]["field"], "ts");
assert_eq!(json["timestamp"]["source"], "auto");
assert_eq!(json["timestamp"]["parsed"], 1);
let ts_field = json["fields"]
.as_array()
.unwrap()
.iter()
.find(|f| f["name"] == "ts")
.unwrap();
assert_eq!(ts_field["timestamp"], true);
let level_field = json["fields"]
.as_array()
.unwrap()
.iter()
.find(|f| f["name"] == "level")
.unwrap();
assert!(level_field.get("timestamp").is_none());
}
#[test]
fn test_format_json_includes_format_keys() {
let mut discovery = discovery_with_one_field();
discovery.format_summary = Some(FormatSummary {
format: "mixed".to_string(),
detection: "per-file",
counts: vec![("cef".to_string(), 3), ("json".to_string(), 1)],
unit: "files",
});
let json: serde_json::Value =
serde_json::from_str(&discovery.format_json()).expect("valid json");
assert_eq!(json["format"], "mixed");
assert_eq!(json["format_detection"], "per-file");
assert_eq!(json["format_counts"]["cef"], 3);
assert_eq!(json["format_counts"]["json"], 1);
assert_eq!(json["format_count_unit"], "files");
}
#[test]
fn test_format_json_omits_counts_for_single_format() {
let mut discovery = discovery_with_one_field();
discovery.format_summary = Some(FormatSummary {
format: "cef".to_string(),
detection: "auto",
counts: vec![],
unit: "",
});
let json: serde_json::Value =
serde_json::from_str(&discovery.format_json()).expect("valid json");
assert_eq!(json["format"], "cef");
assert_eq!(json["format_detection"], "auto");
assert!(json.get("format_counts").is_none());
}
#[test]
fn test_format_table_compact_layout_on_medium_width() {
let mut discovery = FieldDiscovery::new();
let mut fields = IndexMap::new();
fields.insert(
"very.long.field.name".to_string(),
make_string("this is a long example value"),
);
discovery.observe_event(&fields);
let table = discovery.format_table_for_width(56, true);
assert!(table.contains("Field"));
assert!(table.contains("Type"));
assert!(table.contains("Seen"));
assert!(table.contains("Miss"));
assert!(table.contains("Uniq"));
assert!(!table.contains(" examples: "));
assert!(!table.contains(" seen: "));
}
#[test]
fn test_format_table_narrow_layout_on_small_width() {
let mut discovery = FieldDiscovery::new();
let mut fields = IndexMap::new();
fields.insert("request_id".to_string(), make_string("req_001"));
discovery.observe_event(&fields);
let table = discovery.format_table_for_width(38, true);
assert!(table.contains("request_id"));
assert!(table.contains("req_001"));
assert!(table.contains("1"));
assert!(table.contains("0%"));
assert!(
table.contains(" examples: \"req_001\"")
|| table.lines().any(|line| line.starts_with(" \"req_001\"")),
"{table}"
);
}
#[test]
fn test_format_json() {
let mut discovery = FieldDiscovery::new();
let mut fields = IndexMap::new();
fields.insert("level".to_string(), make_string("INFO"));
discovery.observe_event(&fields);
let json = discovery.format_json();
let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
assert_eq!(parsed["total_events"], 1);
assert_eq!(parsed["fields"][0]["name"], "level");
assert_eq!(parsed["fields"][0]["seen"], 1);
assert_eq!(parsed["fields"][0]["cardinality"]["exact"], true);
assert_eq!(parsed["fields"][0]["samples"][0], "INFO");
}
#[test]
fn test_empty_discovery() {
let discovery = FieldDiscovery::new();
let table = discovery.format_table(true);
assert!(table.contains("Scanned 0 events"));
assert!(table.contains("no fields found"));
}
#[test]
fn test_sample_limit() {
let mut profile = FieldProfile::new();
for i in 0..20 {
profile.observe(&make_string(&format!("value_{}", i)));
}
assert_eq!(profile.samples.len(), MAX_SAMPLES);
}
#[test]
fn test_reservoir_sees_rare_values() {
let mut total_rare_in_samples = 0;
let trials = 40;
for _ in 0..trials {
let mut profile = FieldProfile::new();
for _ in 0..1000 {
profile.observe(&make_string("common"));
}
for i in 0..20 {
profile.observe(&make_string(&format!("rare_{i}")));
}
total_rare_in_samples += profile
.samples
.iter()
.filter(|s| s.as_str().is_some_and(|s| s.starts_with("rare_")))
.count();
}
assert!(
total_rare_in_samples > trials * 4,
"reservoir should surface rare distinct values; got {total_rare_in_samples} across {trials} trials",
);
}
#[test]
fn test_scalar_to_json_preserves_scalar_types() {
assert_eq!(
scalar_to_json(&make_string("hello")),
serde_json::json!("hello")
);
assert_eq!(scalar_to_json(&make_int(42)), serde_json::json!(42));
assert_eq!(scalar_to_json(&make_float(2.5)), serde_json::json!(2.5));
assert_eq!(scalar_to_json(&make_bool(true)), serde_json::json!(true));
assert_eq!(scalar_to_json(&make_null()), serde_json::Value::Null);
assert_eq!(scalar_to_json(&Dynamic::from('x')), serde_json::json!("x"));
}
#[test]
fn test_scalar_to_json_preserves_escaped_strings() {
let value = make_string("line1\nline2\t\"quoted\"\\backslash");
assert_eq!(
scalar_to_json(&value),
serde_json::json!("line1\nline2\t\"quoted\"\\backslash")
);
}
#[test]
fn test_format_examples_renders_typed_samples_without_mutating_them() {
let mut profile = FieldProfile::new();
profile.observe(&make_string("hello"));
profile.observe(&make_int(42));
profile.observe(&make_bool(true));
let before = profile.samples.clone();
let examples = format_examples(&profile, "\u{2026}");
assert!(
examples.contains("\"hello\""),
"string sample should render quoted: {examples}"
);
assert!(
examples.contains("42"),
"int sample should render: {examples}"
);
assert!(
!examples.contains("\"42\""),
"int sample should not be quoted: {examples}"
);
assert!(
examples.contains("true"),
"bool sample should render: {examples}"
);
assert_eq!(
profile.samples, before,
"display formatting must not mutate samples"
);
}
#[test]
fn test_format_examples_quotes_and_escapes_strings() {
let mut profile = FieldProfile::new();
profile.observe(&make_string(""));
profile.observe(&make_string("a\nb"));
profile.observe(&make_string("tab\there"));
let examples = format_examples(&profile, "\u{2026}");
assert!(
examples.contains("\"\""),
"empty string should render as \"\": {examples}"
);
assert!(
examples.contains("\"a\\nb\""),
"newlines should be escaped inside quoted strings: {examples}"
);
assert!(
examples.contains("\"tab\\there\""),
"tabs should be escaped inside quoted strings: {examples}"
);
}
#[test]
fn test_long_string_samples_preserved_in_format_examples() {
let long = "x".repeat(200);
let mut profile = FieldProfile::new();
profile.observe(&make_string(&long));
assert_eq!(profile.samples.len(), 1);
assert_eq!(profile.samples[0], serde_json::json!(long));
let examples = format_examples(&profile, "\u{2026}");
assert_eq!(examples, format!("\"{long}\""));
}
#[test]
fn test_compact_examples_line_fills_terminal_width() {
let long = "y".repeat(300);
let mut discovery = FieldDiscovery::new();
let mut fields = IndexMap::new();
fields.insert("sample".to_string(), make_string(&long));
discovery.observe_event(&fields);
let width = 40;
let table = discovery.format_table_for_width(width, true);
let examples_line = table
.lines()
.find(|line| line.starts_with(" \""))
.unwrap_or_else(|| {
panic!("expected indented examples line in compact layout:\n{table}")
});
let line_width = display_width(examples_line);
assert!(
line_width >= width - 1 && line_width <= width,
"compact examples line should fill terminal width {width}, got {line_width}: {examples_line}"
);
}
#[test]
fn test_format_examples_not_capped_at_60_chars() {
let tags = [
"alpha_tag",
"bravo_tag",
"charlie_tag",
"delta_tag",
"echo_tag",
"foxtrot_tag",
"golf_tag",
"hotel_tag",
];
let mut profile = FieldProfile::new();
for tag in tags {
profile.observe(&make_string(tag));
}
let examples = format_examples(&profile, "\u{2026}");
assert!(
examples.chars().count() > 60,
"examples should not be capped at 60 chars: {examples}"
);
for tag in tags {
assert!(
examples.contains(&format!("\"{tag}\"")),
"sample {tag:?} missing from examples: {examples}"
);
}
}
#[test]
fn test_examples_marks_non_exhaustive_sample_lists() {
let mut small = FieldProfile::new();
for v in ["a", "b", "c"] {
small.observe(&make_string(v));
}
let examples = format_examples(&small, "\u{2026}");
assert!(
!examples.ends_with('\u{2026}'),
"exhaustive sample list must not be marked: {examples}"
);
let mut big = FieldProfile::new();
for i in 0..(MAX_SAMPLES + 5) {
big.observe(&make_string(&format!("v{i}")));
}
let examples = format_examples(&big, "\u{2026}");
assert!(
examples.ends_with(", \u{2026}"),
"truncated sample list must end with the marker: {examples}"
);
let ascii = format_examples(&big, "...");
assert!(
ascii.ends_with(", ..."),
"ASCII marker expected under --no-emoji: {ascii}"
);
}
#[test]
fn test_format_table_ascii_fallback_glyphs() {
let long = "z".repeat(120);
let mut discovery = FieldDiscovery::new();
let mut fields = IndexMap::new();
fields.insert("meta".to_string(), make_map(vec![("k", make_string("v"))]));
fields.insert("blob".to_string(), make_string(&long));
discovery.observe_event(&fields);
let table = discovery.format_table_for_width(60, false);
assert!(
!table.contains('\u{2026}') && !table.contains('\u{2014}'),
"ASCII output must not contain Unicode glyphs: {table}"
);
assert!(
table.contains("..."),
"ASCII truncation marker expected: {table}"
);
assert!(
table.contains(" - "),
"ASCII em-dash fallback expected: {table}"
);
}
#[test]
fn test_format_table_uses_wide_terminal_for_examples() {
let tags = [
"alpha_tag",
"bravo_tag",
"charlie_tag",
"delta_tag",
"echo_tag",
"foxtrot_tag",
"golf_tag",
"hotel_tag",
];
let mut discovery = FieldDiscovery::new();
for tag in tags {
let mut per_event = IndexMap::new();
per_event.insert("tag".to_string(), make_string(tag));
discovery.observe_event(&per_event);
}
let table = discovery.format_table_for_width(200, true);
for tag in tags {
assert!(
table.contains(&format!("\"{tag}\"")),
"wide table should include full example {tag:?}: {table}"
);
}
let table = discovery.format_table_for_width(60, true);
assert!(
table.contains('\u{2026}'),
"narrow table should truncate examples: {table}"
);
}
#[test]
fn test_nested_flattening() {
let mut discovery = FieldDiscovery::new();
let mut fields = IndexMap::new();
fields.insert(
"user".to_string(),
make_map(vec![("name", make_string("alice")), ("age", make_int(30))]),
);
fields.insert("level".to_string(), make_string("INFO"));
discovery.observe_event(&fields);
assert!(discovery.fields.contains_key("user"));
assert!(discovery.fields.contains_key("user.name"));
assert!(discovery.fields.contains_key("user.age"));
assert_eq!(discovery.fields["user.name"].seen_count, 1);
assert_eq!(discovery.fields["user.age"].type_counts[&FieldType::Int], 1);
}
#[test]
fn test_array_element_flattening() {
let mut discovery = FieldDiscovery::new();
let mut fields = IndexMap::new();
fields.insert(
"roles".to_string(),
make_array(vec![make_string("admin"), make_string("dev")]),
);
discovery.observe_event(&fields);
assert!(discovery.fields.contains_key("roles"));
assert!(discovery.fields.contains_key("roles[]"));
assert_eq!(discovery.fields["roles[]"].seen_count, 2);
let (card, _) = discovery.fields["roles[]"].cardinality();
assert_eq!(card, 2);
}
#[test]
fn test_array_seen_is_per_event_not_per_element() {
let mut discovery = FieldDiscovery::new();
let mut e1 = IndexMap::new();
e1.insert(
"tags".to_string(),
make_array(vec![make_string("a"), make_string("b"), make_string("c")]),
);
e1.insert("level".to_string(), make_string("INFO"));
discovery.observe_event(&e1);
let mut e2 = IndexMap::new();
e2.insert("level".to_string(), make_string("WARN"));
discovery.observe_event(&e2);
assert_eq!(discovery.fields["tags[]"].seen_count, 3);
assert_eq!(discovery.fields["tags[]"].events_seen, 1);
assert_eq!(discovery.fields["tags"].events_seen, 1);
assert_eq!(discovery.fields["level"].events_seen, 2);
let table = discovery.format_table(false);
let tags_elem_row = table
.lines()
.find(|l| l.trim_start().starts_with("tags[]"))
.expect("tags[] row");
assert!(
tags_elem_row.contains(" 1 ") && tags_elem_row.contains("50%"),
"expected per-event Seen=1 and 50% miss: {tags_elem_row}"
);
}
#[test]
fn test_depth_limit() {
let deep = make_map(vec![(
"b",
make_map(vec![(
"c",
make_map(vec![("d", make_map(vec![("e", make_string("bottom"))]))]),
)]),
)]);
let mut fields = IndexMap::new();
fields.insert("a".to_string(), deep);
let mut discovery = FieldDiscovery::new();
discovery.observe_event(&fields);
assert!(discovery.fields.contains_key("a"));
assert!(discovery.fields.contains_key("a.b"));
assert!(discovery.fields.contains_key("a.b.c"));
assert!(!discovery.fields.contains_key("a.b.c.d"));
assert!(!discovery.fields.contains_key("a.b.c.d.e"));
let table = discovery.format_table(true);
assert!(
table.contains("Nested field flattening stopped at depth 3"),
"table should make depth cap explicit: {table}"
);
let json = discovery.format_json();
let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
assert_eq!(parsed["flatten_depth_limit"], 3);
assert_eq!(parsed["flatten_depth_capped"], true);
}
#[test]
fn test_depth_limit_unlimited() {
let deep = make_map(vec![(
"b",
make_map(vec![(
"c",
make_map(vec![("d", make_map(vec![("e", make_string("bottom"))]))]),
)]),
)]);
let mut fields = IndexMap::new();
fields.insert("a".to_string(), deep);
let mut discovery = FieldDiscovery::with_depth(0);
discovery.observe_event(&fields);
assert!(discovery.fields.contains_key("a"));
assert!(discovery.fields.contains_key("a.b"));
assert!(discovery.fields.contains_key("a.b.c"));
assert!(discovery.fields.contains_key("a.b.c.d"));
assert!(discovery.fields.contains_key("a.b.c.d.e"));
let table = discovery.format_table(true);
assert!(
!table.contains("Nested field flattening stopped"),
"unlimited depth should not emit a depth-cap note: {table}"
);
let json = discovery.format_json();
let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
assert_eq!(parsed["flatten_depth_limit"], 0);
assert_eq!(parsed["flatten_depth_capped"], false);
}
#[test]
fn test_array_seen_exceeds_events_does_not_panic() {
let mut discovery = FieldDiscovery::new();
let mut fields = IndexMap::new();
fields.insert(
"tags".to_string(),
make_array(vec![make_string("a"), make_string("b"), make_string("c")]),
);
discovery.observe_event(&fields);
assert_eq!(discovery.fields["tags[]"].seen_count, 3);
assert_eq!(discovery.total_events, 1);
let table = discovery.format_table(true);
assert!(table.contains("tags[]"));
let json = discovery.format_json();
let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
assert_eq!(parsed["truncated"], false);
assert_eq!(parsed["flatten_depth_limit"], 3);
assert_eq!(parsed["flatten_depth_capped"], false);
}
}