herolib_otoml 0.3.13

OTOML - Canonical TOML serialization format with compact binary representation.
Documentation
//! Canonical OTOML text serialization and deserialization.
//!
//! This module provides the core functions for OTOML text format:
//! - `dump_otoml`: Serialize to canonical OTOML text format
//! - `load_otoml`: Deserialize from any valid TOML

use super::error::{OtomlError, Result};
use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;
use toml::Value;

/// OTOML format prefix - identifies the format and serves as a validation check.
const OTOML_PREFIX: &str = "O:\n";

/// Serialize a value to canonical OTOML text string.
///
/// The output is guaranteed to be:
/// - Prefixed with "O:\n" to identify the format
/// - Valid TOML v1.0
/// - Deterministic (same input always produces same output)
/// - Keys sorted alphabetically
/// - Nested objects as inline tables
pub fn dump_otoml<T: Serialize>(value: &T) -> Result<String> {
    // First serialize to toml::Value to manipulate the structure
    let toml_value = toml::Value::try_from(value)?;

    // Convert to canonical form
    let canonical = to_canonical(&toml_value);

    // Serialize to string with OTOML prefix
    let mut output = String::from(OTOML_PREFIX);
    output.push_str(&serialize_canonical(&canonical, 0, false));
    Ok(output)
}

/// Deserialize an OTOML/TOML text string into a value.
///
/// The input must start with "O:\n" prefix to be recognized as OTOML format.
/// This prefix serves as a format identifier and validation check.
pub fn load_otoml<T: for<'de> Deserialize<'de>>(s: &str) -> Result<T> {
    // Check for OTOML prefix
    let content = if let Some(stripped) = s.strip_prefix(OTOML_PREFIX) {
        stripped
    } else {
        return Err(OtomlError::InvalidFormat(
            "OTOML data must start with 'O:' prefix".to_string(),
        ));
    };

    // Parse the TOML content
    let value: T = toml::from_str(content)?;
    Ok(value)
}

/// Normalize keys in a TOML string to snake_case.
///
/// This is useful for accepting input with various key formats
/// (camelCase, kebab-case, etc.) and normalizing them.
pub fn normalize_keys(s: &str) -> Result<String> {
    let value: Value = toml::from_str(s).map_err(OtomlError::ParseError)?;
    let normalized = normalize_value(&value);
    Ok(serialize_canonical(&normalized, 0, false))
}

/// Convert a toml::Value to canonical form with sorted keys.
fn to_canonical(value: &Value) -> Value {
    match value {
        Value::Table(table) => {
            let mut sorted: BTreeMap<String, Value> = BTreeMap::new();
            for (key, val) in table {
                sorted.insert(key.clone(), to_canonical(val));
            }
            Value::Table(sorted.into_iter().collect())
        }
        Value::Array(arr) => Value::Array(arr.iter().map(to_canonical).collect()),
        other => other.clone(),
    }
}

/// Normalize keys to snake_case.
fn normalize_value(value: &Value) -> Value {
    match value {
        Value::Table(table) => {
            let mut normalized: BTreeMap<String, Value> = BTreeMap::new();
            for (key, val) in table {
                let snake_key = to_snake_case(key);
                normalized.insert(snake_key, normalize_value(val));
            }
            Value::Table(normalized.into_iter().collect())
        }
        Value::Array(arr) => Value::Array(arr.iter().map(normalize_value).collect()),
        other => other.clone(),
    }
}

/// Convert a string to snake_case.
fn to_snake_case(s: &str) -> String {
    let mut result = String::with_capacity(s.len() + 4);
    let mut prev_was_lowercase = false;
    let mut prev_was_underscore = false;

    for ch in s.chars() {
        if ch == '-' || ch == '_' {
            if !result.is_empty() && !prev_was_underscore {
                result.push('_');
                prev_was_underscore = true;
            }
            prev_was_lowercase = false;
        } else if ch.is_ascii_uppercase() {
            if prev_was_lowercase && !prev_was_underscore {
                result.push('_');
            }
            result.push(ch.to_ascii_lowercase());
            prev_was_lowercase = false;
            prev_was_underscore = false;
        } else {
            result.push(ch.to_ascii_lowercase());
            prev_was_lowercase = ch.is_ascii_lowercase();
            prev_was_underscore = false;
        }
    }

    result
}

/// Serialize a value to canonical OTOML string.
fn serialize_canonical(value: &Value, indent: usize, inline: bool) -> String {
    match value {
        Value::Table(table) => {
            if inline {
                serialize_inline_table(table)
            } else {
                serialize_root_table(table)
            }
        }
        Value::Array(arr) => serialize_array(arr, indent),
        Value::String(s) => serialize_string(s),
        Value::Integer(i) => i.to_string(),
        Value::Float(f) => {
            if f.fract() == 0.0 {
                format!("{:.1}", f)
            } else {
                f.to_string()
            }
        }
        Value::Boolean(b) => b.to_string(),
        Value::Datetime(dt) => format!("\"{}\"", normalize_datetime(&dt.to_string())),
    }
}

/// Serialize the root table.
fn serialize_root_table(table: &toml::map::Map<String, Value>) -> String {
    serialize_table_with_path(table, &[])
}

/// Serialize a table with a given path prefix for nested array of tables.
fn serialize_table_with_path(table: &toml::map::Map<String, Value>, path: &[&str]) -> String {
    let mut output = String::new();
    let mut simple_keys: Vec<&String> = Vec::new();
    let mut array_of_tables: Vec<(&String, &Vec<Value>)> = Vec::new();

    // Sort keys
    let mut keys: Vec<&String> = table.keys().collect();
    keys.sort();

    // Separate simple values from arrays of tables
    for key in &keys {
        let value = table.get(*key).unwrap();
        if let Value::Array(arr) = value {
            if !arr.is_empty() && matches!(arr[0], Value::Table(_)) {
                array_of_tables.push((key, arr));
                continue;
            }
        }
        simple_keys.push(key);
    }

    // Output simple key-value pairs
    for key in simple_keys {
        let value = table.get(key).unwrap();
        output.push_str(key);
        output.push_str(" = ");

        // Nested tables become inline tables
        let is_table = matches!(value, Value::Table(_));
        output.push_str(&serialize_canonical(value, 0, is_table));
        output.push('\n');
    }

    // Output arrays of tables
    for (key, arr) in array_of_tables {
        // Build the full path for this array of tables
        let mut full_path: Vec<&str> = path.to_vec();
        full_path.push(key);
        let path_str = full_path.join(".");

        for item in arr {
            if !output.is_empty() && !output.ends_with("\n\n") {
                output.push('\n');
            }
            output.push_str(&format!("[[{}]]\n", path_str));
            if let Value::Table(t) = item {
                output.push_str(&serialize_table_with_path(t, &full_path));
            }
        }
    }

    output
}

/// Serialize an inline table.
fn serialize_inline_table(table: &toml::map::Map<String, Value>) -> String {
    let mut output = String::from("{ ");
    let mut keys: Vec<&String> = table.keys().collect();
    keys.sort();

    for (i, key) in keys.iter().enumerate() {
        if i > 0 {
            output.push_str(", ");
        }
        let value = table.get(*key).unwrap();
        output.push_str(key);
        output.push_str(" = ");

        let is_table = matches!(value, Value::Table(_));
        output.push_str(&serialize_canonical(value, 0, is_table));
    }

    output.push_str(" }");
    output
}

/// Serialize an array.
fn serialize_array(arr: &[Value], _indent: usize) -> String {
    let mut output = String::from("[");

    for (i, value) in arr.iter().enumerate() {
        if i > 0 {
            output.push_str(", ");
        }
        let is_table = matches!(value, Value::Table(_));
        output.push_str(&serialize_canonical(value, 0, is_table));
    }

    output.push(']');
    output
}

/// Serialize a string with proper escaping.
fn serialize_string(s: &str) -> String {
    if s.contains('\n') {
        // Multiline string - use triple quotes
        // The closing """ must be on the same line as the last character
        // to avoid adding a trailing newline
        format!("\"\"\"{}\"\"\"", s)
    } else {
        // Single line - escape special characters
        let mut output = String::from("\"");
        for ch in s.chars() {
            match ch {
                '"' => output.push_str("\\\""),
                '\\' => output.push_str("\\\\"),
                '\t' => output.push_str("\\t"),
                '\r' => output.push_str("\\r"),
                ch if ch.is_control() => {
                    output.push_str(&format!("\\u{:04X}", ch as u32));
                }
                _ => output.push(ch),
            }
        }
        output.push('"');
        output
    }
}

/// Normalize a datetime to OTOML format "YYYY-MM-DD HH:MM:SS".
fn normalize_datetime(dt: &str) -> String {
    // Handle ISO 8601 format: 2025-09-17T14:32:08Z
    let dt = dt.trim().trim_end_matches('Z');

    // Replace T with space
    if let Some(pos) = dt.find('T') {
        let date = &dt[..pos];
        let time = &dt[pos + 1..];

        // Truncate milliseconds if present
        let time = time.split('.').next().unwrap_or(time);

        // Ensure we have seconds
        let parts: Vec<&str> = time.split(':').collect();
        let formatted_time = if parts.len() >= 3 {
            format!("{}:{}:{}", parts[0], parts[1], parts[2])
        } else if parts.len() == 2 {
            format!("{}:{}:00", parts[0], parts[1])
        } else {
            "00:00:00".to_string()
        };

        format!("{} {}", date, formatted_time)
    } else {
        dt.to_string()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_to_snake_case() {
        assert_eq!(to_snake_case("myKey"), "my_key");
        assert_eq!(to_snake_case("AnotherKey"), "another_key");
        assert_eq!(to_snake_case("kebab-key"), "kebab_key");
        assert_eq!(to_snake_case("already_snake"), "already_snake");
        assert_eq!(to_snake_case("UPPERCASE"), "uppercase");
    }

    #[test]
    fn test_normalize_datetime() {
        assert_eq!(
            normalize_datetime("2025-09-17T14:32:08Z"),
            "2025-09-17 14:32:08"
        );
        assert_eq!(
            normalize_datetime("2025-09-17T14:32:08"),
            "2025-09-17 14:32:08"
        );
        assert_eq!(
            normalize_datetime("2025-09-17T14:32:08.123Z"),
            "2025-09-17 14:32:08"
        );
    }
}