use std::sync::atomic::{AtomicU64, Ordering};
use serde_json::Value;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Extracted<T>(T);
impl<T> Extracted<T> {
#[allow(clippy::should_implement_trait)]
pub fn as_ref(&self) -> &T {
&self.0
}
#[cfg(test)]
pub fn from_test_value(value: T) -> Self {
wrap(value)
}
pub(crate) fn from_stored(value: T) -> Self {
wrap(value)
}
}
impl<T> std::ops::Deref for Extracted<T> {
type Target = T;
fn deref(&self) -> &T {
&self.0
}
}
fn wrap<T>(value: T) -> Extracted<T> {
Extracted(value)
}
pub(crate) const LEAF_CAP: usize = 10 * 1024 * 1024;
static TRUNCATED_VALUES: AtomicU64 = AtomicU64::new(0);
pub(crate) fn truncated_values_count() -> u64 {
TRUNCATED_VALUES.load(Ordering::Relaxed)
}
fn record_truncation(original_bytes: usize) {
TRUNCATED_VALUES.fetch_add(1, Ordering::Relaxed);
tracing::debug!(
original_bytes,
cap_bytes = LEAF_CAP,
"value exceeded the seam leaf cap; truncated to a marked sentinel"
);
}
fn truncation_marker(original_bytes: usize) -> String {
format!("<pond:truncated {original_bytes} bytes>")
}
pub(crate) fn truncate_to_marker(head: &[u8], original: usize) -> String {
let marker = truncation_marker(original);
let mut end = LEAF_CAP.saturating_sub(marker.len());
while end > 0 && head[end] & 0xC0 == 0x80 {
end -= 1;
}
let mut capped = String::from_utf8_lossy(&head[..end]).into_owned();
capped.push_str(&marker);
record_truncation(original);
capped
}
pub(crate) fn bound_str(s: &mut String) -> bool {
if s.len() <= LEAF_CAP {
return false;
}
*s = truncate_to_marker(s.as_bytes(), s.len());
true
}
pub(crate) fn bound_value(value: &mut Value) {
match value {
Value::String(s) => {
bound_str(s);
}
Value::Array(items) => items.iter_mut().for_each(bound_value),
Value::Object(map) => map.values_mut().for_each(bound_value),
Value::Null | Value::Bool(_) | Value::Number(_) => {}
}
}
pub fn extract_raw_record(row: &Value) -> Value {
let mut bounded = row.clone();
bound_value(&mut bounded);
bounded
}
impl<T: serde::Serialize> serde::Serialize for Extracted<T> {
fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
self.0.serialize(serializer)
}
}
impl<'de, T: serde::Deserialize<'de>> serde::Deserialize<'de> for Extracted<T> {
fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
T::deserialize(deserializer).map(wrap)
}
}
pub trait Source {
fn str_field(&self, key: &str) -> Option<&str>;
fn bool_field(&self, key: &str) -> Option<bool>;
fn value_field(&self, key: &str) -> Option<&Value>;
fn nested(&self, key: &str) -> Option<&dyn Source>;
fn as_str(&self) -> Option<&str> {
None
}
fn compact_repr(&self) -> String;
}
pub fn extract_str(source: &dyn Source, key: &str) -> Option<Extracted<String>> {
source.str_field(key).map(|s| {
let mut owned = s.to_owned();
bound_str(&mut owned);
wrap(owned)
})
}
pub fn extract_bool(source: &dyn Source, key: &str) -> Option<Extracted<bool>> {
source.bool_field(key).map(wrap)
}
pub fn extract_value(source: &dyn Source, key: &str) -> Option<Extracted<Value>> {
source.value_field(key).cloned().map(|mut value| {
bound_value(&mut value);
wrap(value)
})
}
pub fn extract_self_str(source: &dyn Source) -> Option<Extracted<String>> {
source.as_str().map(|s| {
let mut owned = s.to_owned();
bound_str(&mut owned);
wrap(owned)
})
}
pub fn extract_compact_repr(source: &dyn Source) -> Extracted<String> {
let mut repr = source.compact_repr();
bound_str(&mut repr);
wrap(repr)
}
impl Source for Value {
fn str_field(&self, key: &str) -> Option<&str> {
self.get(key).and_then(Value::as_str)
}
fn bool_field(&self, key: &str) -> Option<bool> {
self.get(key).and_then(Value::as_bool)
}
fn value_field(&self, key: &str) -> Option<&Value> {
self.get(key)
}
fn nested(&self, key: &str) -> Option<&dyn Source> {
self.get(key).map(|v| v as &dyn Source)
}
fn as_str(&self) -> Option<&str> {
Value::as_str(self)
}
fn compact_repr(&self) -> String {
serde_json::to_string(self).unwrap_or_else(|_| "{}".to_owned())
}
}
#[cfg(test)]
mod tests {
#![allow(clippy::expect_used, clippy::unwrap_used)]
use super::*;
use serde_json::json;
#[test]
fn extract_str_pulls_present_field_and_wraps() {
let row = json!({"name": "Edit", "input": {"file": "/tmp/foo"}});
let extracted = extract_str(&row, "name").expect("name is present");
assert_eq!(&*extracted, "Edit", "Deref exposes the inner string");
assert_eq!(extracted.as_ref(), "Edit", "as_ref does too");
}
#[test]
fn extract_str_returns_none_when_field_absent() {
let row = json!({"name": "Edit"});
assert!(extract_str(&row, "missing").is_none());
}
#[test]
fn extract_str_returns_none_when_field_is_not_a_string() {
let row = json!({"name": 42});
assert!(
extract_str(&row, "name").is_none(),
"wrong-type fields surface as absence; adapters that care should branch on value_field first",
);
}
#[test]
fn extract_bool_and_extract_value_round_trip() {
let row = json!({
"is_error": true,
"input": {"k": "v"},
});
let is_error = extract_bool(&row, "is_error").expect("present");
assert!(*is_error);
let params = extract_value(&row, "input").expect("present");
assert_eq!(&*params, &json!({"k": "v"}));
}
#[test]
fn extracted_serde_round_trip_preserves_value() {
let extracted = extract_str(&json!({"k": "hello"}), "k").unwrap();
let encoded = serde_json::to_string(&extracted).unwrap();
assert_eq!(encoded, "\"hello\"");
let decoded: Extracted<String> = serde_json::from_str(&encoded).unwrap();
assert_eq!(&*decoded, "hello");
}
#[test]
fn source_impl_for_value_walks_nested_objects() {
let row = json!({"message": {"role": "user", "content": "hi"}});
let nested = row.nested("message").expect("message is an object");
assert_eq!(nested.str_field("role"), Some("user"));
assert_eq!(nested.str_field("content"), Some("hi"));
assert!(row.nested("missing").is_none());
}
#[test]
fn bound_value_caps_every_position_and_spares_good_leaves() {
let oversize = "x".repeat(LEAF_CAP + 100);
let mut value = json!({
"first": oversize,
"good_a": "ok",
"middle": oversize,
"good_b": "ok",
"nested": {"deep": oversize, "kept": "ok"},
"list": ["ok", oversize, "ok"],
"last": oversize,
});
bound_value(&mut value);
let marker = format!("{} bytes>", LEAF_CAP + 100);
let capped = |v: &Value| {
let text = v.as_str().expect("string leaf");
text.len() <= LEAF_CAP && text.ends_with(&marker)
};
let intact = |v: &Value| v.as_str() == Some("ok");
for path in [&value["first"], &value["middle"], &value["last"]] {
assert!(capped(path));
}
assert!(capped(&value["nested"]["deep"]));
assert!(capped(&value["list"][1]));
assert!(intact(&value["good_a"]));
assert!(intact(&value["good_b"]));
assert!(intact(&value["nested"]["kept"]));
assert!(intact(&value["list"][0]));
assert!(intact(&value["list"][2]));
}
}