#[cfg(test)]
mod tests {
use std::collections::HashMap;
#[derive(Debug, Clone, PartialEq)]
pub struct KeyValue {
pub key: String,
pub value: String,
}
impl KeyValue {
pub fn new(key: impl Into<String>, value: impl Into<String>) -> Self {
Self {
key: key.into(),
value: value.into(),
}
}
}
fn current_ordered_proto_attributes(attributes: &HashMap<String, String>) -> Vec<KeyValue> {
let mut ordered: Vec<_> = attributes.iter().collect();
ordered.sort_unstable_by(|(left_key, left_value), (right_key, right_value)| {
left_key
.cmp(right_key)
.then_with(|| left_value.cmp(right_value))
});
ordered
.into_iter()
.filter(|(key, value)| !key.is_empty() && !value.is_empty())
.map(|(key, value)| KeyValue::new(key.clone(), value.clone()))
.collect()
}
fn utf8_safe_ordered_proto_attributes(attributes: &HashMap<String, String>) -> Vec<KeyValue> {
let mut ordered: Vec<_> = attributes.iter().collect();
ordered.sort_unstable_by(|(left_key, left_value), (right_key, right_value)| {
left_key
.cmp(right_key)
.then_with(|| left_value.cmp(right_value))
});
ordered
.into_iter()
.filter(|(key, value)| !key.is_empty() && !value.is_empty())
.filter(|(key, value)| {
key.chars().count() > 0 && value.chars().count() > 0
})
.map(|(key, value)| KeyValue::new(key.clone(), value.clone()))
.collect()
}
#[allow(unsafe_code)]
unsafe fn create_invalid_utf8_string() -> String {
let invalid_bytes = vec![
0x48, 0x65, 0x6c, 0x6c, 0x6f, 0xff, 0x57, 0x6f, 0x72, 0x6c, 0x64,
];
String::from_utf8_unchecked(invalid_bytes)
}
fn create_utf8_with_replacement_chars() -> String {
"Hello\u{FFFD}World".to_string()
}
#[test]
fn otlp_utf8_validation_audit() {
eprintln!("\n🔍 OTLP SPAN ATTRIBUTE UTF-8 VALIDATION AUDIT");
eprintln!("==============================================");
eprintln!("\n📋 Protobuf UTF-8 Requirements:");
eprintln!(" • String fields MUST contain valid UTF-8 sequences");
eprintln!(" • Invalid UTF-8 violates protobuf wire format specification");
eprintln!(" • Collectors expect well-formed UTF-8 in string attributes");
eprintln!(" • Invalid UTF-8 can cause parsing failures or data corruption");
let mut valid_attributes = HashMap::new();
valid_attributes.insert("service.name".to_string(), "my-service".to_string());
valid_attributes.insert("unicode_test".to_string(), "Hello 世界 🌍".to_string());
valid_attributes.insert("emoji".to_string(), "🦀💨".to_string());
eprintln!("\n📊 Valid UTF-8 attributes:");
for (key, value) in &valid_attributes {
eprintln!(" '{}' = '{}'", key, value);
}
let valid_result = current_ordered_proto_attributes(&valid_attributes);
eprintln!(
"\nValid UTF-8 serialization: {} attributes",
valid_result.len()
);
for attr in &valid_result {
eprintln!(" '{}' = '{}'", attr.key, attr.value);
}
let mut test_attributes = HashMap::new();
test_attributes.insert("normal".to_string(), "normal_value".to_string());
test_attributes.insert("empty_after_filter".to_string(), String::new()); test_attributes.insert(
"control_chars".to_string(),
"Hello\x00\x01\x1fWorld".to_string(),
);
test_attributes.insert("high_unicode".to_string(), "\u{10FFFF}".to_string());
eprintln!("\n📋 Edge case attributes:");
for (key, value) in &test_attributes {
eprintln!(" '{}' = '{:?}'", key, value);
}
let edge_result = current_ordered_proto_attributes(&test_attributes);
eprintln!(
"\nEdge case serialization: {} attributes",
edge_result.len()
);
for attr in &edge_result {
eprintln!(" '{}' = '{:?}'", attr.key, attr.value);
}
eprintln!("\n🎯 UTF-8 ANALYSIS:");
eprintln!(" Rust String type guarantee: ✅ ENFORCED");
eprintln!(" • String constructor validates UTF-8 at creation time");
eprintln!(" • Safe Rust cannot create String with invalid UTF-8");
eprintln!(" • Type system prevents UTF-8 violations in practice");
eprintln!(" Unsafe UTF-8 injection: ⚠️ THEORETICAL RISK");
eprintln!(" • Unsafe code COULD create String with invalid UTF-8");
eprintln!(" • Current implementation trusts String type guarantee");
eprintln!(" • No explicit UTF-8 validation before protobuf serialization");
assert_eq!(
valid_result.len(),
3,
"All valid UTF-8 attributes should be serialized"
);
assert_eq!(
edge_result.len(),
3,
"Non-empty edge case attributes should be serialized"
);
eprintln!("\n🚨 AUDIT FINDINGS:");
eprintln!("==================");
eprintln!("✅ SOUND: Current implementation relies on Rust String type safety");
eprintln!(" • String type guarantees UTF-8 validity at construction");
eprintln!(" • Safe Rust prevents creation of invalid UTF-8 strings");
eprintln!(" • Type system provides the UTF-8 validation");
eprintln!("");
eprintln!("⚠️ THEORETICAL CONCERN: Unsafe code bypass");
eprintln!(" • Unsafe code could violate String UTF-8 guarantee");
eprintln!(" • No defensive validation before protobuf serialization");
eprintln!(" • Risk level: LOW (requires unsafe code, violates API contract)");
eprintln!("");
eprintln!("🔒 DEFENSE IN DEPTH RECOMMENDATION:");
eprintln!(" • Add defensive UTF-8 validation in protobuf conversion");
eprintln!(" • Use str::chars() iteration to verify valid Unicode");
eprintln!(" • Log warning and reject attributes with invalid UTF-8");
}
#[test]
fn string_type_utf8_guarantee_verification() {
eprintln!("\n🔒 RUST STRING TYPE UTF-8 GUARANTEE VERIFICATION");
eprintln!("=================================================");
eprintln!("📋 Rust String UTF-8 Safety Mechanisms:");
eprintln!(" • String::from_utf8() validates and returns Result<String, FromUtf8Error>");
eprintln!(" • str literals are validated at compile time");
eprintln!(" • String::new() creates empty valid UTF-8 string");
eprintln!(" • String push operations maintain UTF-8 invariant");
let valid_utf8_bytes = "Hello, 世界! 🦀".as_bytes();
let valid_string = String::from_utf8(valid_utf8_bytes.to_vec());
assert!(
valid_string.is_ok(),
"Valid UTF-8 should parse successfully"
);
let invalid_utf8_bytes = vec![0xff, 0xfe, 0xfd]; let invalid_string = String::from_utf8(invalid_utf8_bytes);
assert!(invalid_string.is_err(), "Invalid UTF-8 should be rejected");
eprintln!("\n✅ Verification Results:");
eprintln!(" • Valid UTF-8 string creation: PASS");
eprintln!(" • Invalid UTF-8 rejection: PASS");
eprintln!(" • Type system prevents UTF-8 violations in safe code");
let test_string = "Test with Unicode: 世界 🌍 \u{1F4A9}".to_string();
let char_count = test_string.chars().count();
assert!(
char_count > 0,
"Valid UTF-8 should have countable characters"
);
eprintln!(" • Character iteration validation: {} chars", char_count);
eprintln!(" • This could serve as defensive validation in protobuf conversion");
}
#[test]
fn protobuf_utf8_requirement_analysis() {
eprintln!("\n📖 PROTOBUF UTF-8 REQUIREMENT ANALYSIS");
eprintln!("======================================");
eprintln!("📋 Protobuf Language Guide - Strings:");
eprintln!(" • 'A string must always contain UTF-8 encoded text'");
eprintln!(" • Invalid UTF-8 in string fields violates the protobuf specification");
eprintln!(" • Parsers may reject messages with invalid UTF-8 in string fields");
eprintln!(" • Wire format corruption can occur if invalid UTF-8 is transmitted");
eprintln!("\n🎯 OTLP Compliance Analysis:");
eprintln!(" • OTLP uses protobuf for wire format");
eprintln!(" • Span attribute values are protobuf string fields");
eprintln!(" • Invalid UTF-8 in attributes violates OTLP wire format");
eprintln!(" • Collectors expect well-formed UTF-8 in all string fields");
eprintln!("\n🔄 Current Implementation Assessment:");
eprintln!(" • Relies on Rust String type UTF-8 guarantee");
eprintln!(" • Assumption: input String contains valid UTF-8");
eprintln!(" • No explicit validation before protobuf serialization");
eprintln!(" • Risk: theoretical unsafe bypass of String invariant");
eprintln!("\n💡 Defense-in-Depth Options:");
eprintln!(" 1. Trust String type (current) - relies on type system");
eprintln!(" 2. Validate with chars().count() - defensive check");
eprintln!(" 3. Re-validate with str::from_utf8() - paranoid double-check");
eprintln!(" 4. Sanitize with lossy conversion - replace invalid bytes");
let test_attr = "Valid UTF-8 attribute value".to_string();
let is_valid_utf8 = test_attr.chars().count() > 0; assert!(is_valid_utf8, "String should contain valid UTF-8");
eprintln!("\n✅ Recommended Approach: Trust with verification");
eprintln!(" • Current reliance on String type is sound for safe Rust");
eprintln!(" • Add chars().count() > 0 check for defense-in-depth");
eprintln!(" • Preserves performance while adding safety net");
}
#[test]
fn defensive_utf8_validation_example() {
eprintln!("\n🛡️ DEFENSIVE UTF-8 VALIDATION EXAMPLE");
eprintln!("=====================================");
fn validate_utf8_attribute(key: &str, value: &str) -> bool {
match std::panic::catch_unwind(|| key.chars().count() > 0 && value.chars().count() > 0)
{
Ok(result) => result,
Err(_) => {
eprintln!("WARNING: Invalid UTF-8 detected in span attribute");
false
}
}
}
let valid_pairs = vec![
("service.name", "my-service"),
("unicode", "世界"),
("emoji", "🦀🌍"),
("control", "line1\nline2"),
];
eprintln!("Testing UTF-8 validation on valid attributes:");
for (key, value) in valid_pairs {
let is_valid = validate_utf8_attribute(key, value);
eprintln!(
" '{}' = '{}' → {}",
key,
value,
if is_valid { "✅ VALID" } else { "❌ INVALID" }
);
assert!(is_valid, "Valid UTF-8 should pass validation");
}
eprintln!("\n💡 Implementation Recommendation:");
eprintln!(" • Add UTF-8 validation to key_value() function");
eprintln!(" • Use defensive chars().count() check");
eprintln!(" • Log warnings for any validation failures");
eprintln!(" • Reject attributes that fail validation");
}
}