use quick_xml::events::{BytesStart, Event};
use quick_xml::reader::Reader;
use quick_xml::writer::Writer;
use serde_json::Value as JsonValue;
use std::io::Cursor;
use thiserror::Error;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Format {
Json,
Xml,
Sql,
}
#[derive(Debug, Error)]
pub enum ParseError {
#[error("invalid JSON: {0}")]
InvalidJson(#[from] serde_json::Error),
#[error("invalid XML: {0}")]
InvalidXml(String),
#[error("invalid SQL: {0}")]
InvalidSql(String),
#[error("unknown format: the text does not look like valid JSON, XML, or SQL")]
UnknownFormat,
#[error("the document is empty")]
EmptyInput,
#[error("document exceeds the {limit}-byte limit (is {actual} bytes)")]
InputTooLarge { limit: usize, actual: usize },
}
pub const MAX_INPUT_SIZE: usize = 10 * 1024 * 1024;
#[derive(Debug, Clone, PartialEq)]
pub struct XmlNode {
pub tag: String,
pub attributes: Vec<(String, String)>,
pub children: Vec<XmlChild>,
}
#[derive(Debug, Clone, PartialEq)]
pub enum XmlChild {
Text(String),
Node(XmlNode),
}
const SQL_DETECT_KEYWORDS: &[&str] = &[
"SELECT", "INSERT", "UPDATE", "DELETE", "CREATE", "DROP", "ALTER",
"WITH", "TRUNCATE", "MERGE", "REPLACE", "EXPLAIN", "CALL", "BEGIN",
"COMMIT", "ROLLBACK",
"DECLARE", "SET", "EXEC", "EXECUTE", "USE", "IF", "WHILE", "PRINT",
"GO", "GRANT", "REVOKE", "PRAGMA",
];
pub fn auto_detect_format(input: &str) -> Result<Format, ParseError> {
let trimmed = input.trim();
if trimmed.is_empty() {
return Err(ParseError::EmptyInput);
}
match trimmed.as_bytes()[0] {
b'{' | b'[' => return Ok(Format::Json),
b'<' => return Ok(Format::Xml),
_ => {}
}
let first_word = first_sql_keyword(trimmed).to_uppercase();
if SQL_DETECT_KEYWORDS.contains(&first_word.as_str()) {
return Ok(Format::Sql);
}
if looks_like_sql(trimmed) {
return Ok(Format::Sql);
}
Err(ParseError::UnknownFormat)
}
fn looks_like_sql(input: &str) -> bool {
let sample = &input[..input.len().min(1024)].to_uppercase();
const CLAUSE_WORDS: &[&str] = &[
"SELECT", "FROM", "WHERE", "JOIN", "GROUP BY", "ORDER BY", "HAVING",
"INSERT", "UPDATE", "DELETE", "CREATE", "ALTER", "DROP", "WITH",
"INNER JOIN", "LEFT JOIN", "RIGHT JOIN", "UNION", "SET ", "VALUES",
];
let hits = CLAUSE_WORDS.iter().filter(|&&kw| sample.contains(kw)).count();
hits >= 2
}
fn first_sql_keyword(input: &str) -> String {
let bytes = input.as_bytes();
let mut i = 0;
loop {
while i < bytes.len() && matches!(bytes[i], b' ' | b'\t' | b'\n' | b'\r') {
i += 1;
}
if i >= bytes.len() {
return String::new();
}
if i + 1 < bytes.len() && bytes[i] == b'-' && bytes[i + 1] == b'-' {
while i < bytes.len() && bytes[i] != b'\n' {
i += 1;
}
continue;
}
if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'*' {
i += 2;
while i + 1 < bytes.len() {
if bytes[i] == b'*' && bytes[i + 1] == b'/' {
i += 2;
break;
}
i += 1;
}
continue;
}
if bytes[i].is_ascii_alphabetic() {
return String::from_utf8_lossy(&bytes[i..])
.chars()
.take_while(|c| c.is_alphabetic())
.collect();
}
return String::new();
}
}
pub fn parse_json(input: &str) -> Result<JsonValue, ParseError> {
validate_size(input)?;
let value = serde_json::from_str(input)?;
Ok(value)
}
pub fn parse_xml(input: &str) -> Result<XmlNode, ParseError> {
validate_size(input)?;
let trimmed = input.trim();
if trimmed.is_empty() {
return Err(ParseError::EmptyInput);
}
let mut reader = Reader::from_str(trimmed);
reader.config_mut().trim_text_start = true;
reader.config_mut().trim_text_end = true;
let mut stack: Vec<XmlNode> = Vec::new();
let mut root: Option<XmlNode> = None;
loop {
match reader.read_event() {
Ok(Event::Start(ref e)) => {
let node = build_node_from_start(e, &reader)?;
stack.push(node);
}
Ok(Event::Empty(ref e)) => {
let node = build_node_from_start(e, &reader)?;
if let Some(parent) = stack.last_mut() {
parent.children.push(XmlChild::Node(node));
} else {
root = Some(node);
}
}
Ok(Event::Text(ref e)) => {
let text = e
.unescape()
.map_err(|err| ParseError::InvalidXml(format!("Error en texto: {err}")))?
.to_string();
if !text.trim().is_empty() {
if let Some(parent) = stack.last_mut() {
parent.children.push(XmlChild::Text(text));
}
}
}
Ok(Event::End(_)) => {
let finished = stack.pop().ok_or_else(|| {
ParseError::InvalidXml("Etiqueta de cierre sin apertura".into())
})?;
if let Some(parent) = stack.last_mut() {
parent.children.push(XmlChild::Node(finished));
} else {
root = Some(finished);
}
}
Ok(Event::Eof) => break,
Ok(Event::Decl(_) | Event::Comment(_) | Event::CData(_) | Event::PI(_)) => {}
Err(e) => {
return Err(ParseError::InvalidXml(format!(
"Error en posición {}: {e}",
reader.error_position()
)));
}
_ => {}
}
}
root.ok_or_else(|| ParseError::InvalidXml("No se encontró un nodo raíz".into()))
}
pub fn parse_sql(input: &str) -> Result<String, ParseError> {
validate_size(input)?;
let trimmed = input.trim();
if trimmed.is_empty() {
return Err(ParseError::EmptyInput);
}
Ok(trimmed.to_string())
}
pub fn format_pretty(input: &str, fmt: Format) -> Result<String, ParseError> {
validate_size(input)?;
match fmt {
Format::Json => {
let value: JsonValue = serde_json::from_str(input)?;
let pretty = serde_json::to_string_pretty(&value).map_err(ParseError::InvalidJson)?;
Ok(pretty)
}
Format::Xml => pretty_print_xml(input),
Format::Sql => format_sql_pretty(input),
}
}
fn validate_size(input: &str) -> Result<(), ParseError> {
if input.len() > MAX_INPUT_SIZE {
return Err(ParseError::InputTooLarge {
limit: MAX_INPUT_SIZE,
actual: input.len(),
});
}
Ok(())
}
fn build_node_from_start(e: &BytesStart, reader: &Reader<&[u8]>) -> Result<XmlNode, ParseError> {
let tag = reader
.decoder()
.decode(e.name().as_ref())
.map_err(|err| ParseError::InvalidXml(format!("Error decodificando tag: {err}")))?
.to_string();
let mut attributes = Vec::new();
for attr_result in e.attributes() {
let attr = attr_result
.map_err(|err| ParseError::InvalidXml(format!("Error en atributo: {err}")))?;
let key = reader
.decoder()
.decode(attr.key.as_ref())
.map_err(|err| ParseError::InvalidXml(format!("Error en clave de atributo: {err}")))?
.to_string();
let value = attr
.unescape_value()
.map_err(|err| ParseError::InvalidXml(format!("Error en valor de atributo: {err}")))?
.to_string();
attributes.push((key, value));
}
Ok(XmlNode {
tag,
attributes,
children: Vec::new(),
})
}
fn format_sql_pretty(input: &str) -> Result<String, ParseError> {
use sqlformat::{FormatOptions, Indent, QueryParams};
let opts = FormatOptions {
indent: Indent::Spaces(2),
uppercase: Some(true),
lines_between_queries: 1,
ignore_case_convert: None,
};
let formatted = sqlformat::format(input, &QueryParams::None, &opts);
if formatted.trim().is_empty() && !input.trim().is_empty() {
return Err(ParseError::InvalidSql(
"sqlformat returned empty output".into(),
));
}
Ok(formatted)
}
fn pretty_print_xml(input: &str) -> Result<String, ParseError> {
let mut reader = Reader::from_str(input.trim());
reader.config_mut().trim_text_start = true;
reader.config_mut().trim_text_end = true;
let mut writer = Writer::new_with_indent(Cursor::new(Vec::new()), b' ', 2);
loop {
match reader.read_event() {
Ok(Event::Start(e)) => {
writer.write_event(Event::Start(e)).map_err(|err| {
ParseError::InvalidXml(format!("Error escribiendo XML: {err}"))
})?;
}
Ok(Event::End(e)) => {
writer.write_event(Event::End(e)).map_err(|err| {
ParseError::InvalidXml(format!("Error escribiendo XML: {err}"))
})?;
}
Ok(Event::Empty(e)) => {
writer.write_event(Event::Empty(e)).map_err(|err| {
ParseError::InvalidXml(format!("Error escribiendo XML: {err}"))
})?;
}
Ok(Event::Text(e)) => {
writer.write_event(Event::Text(e)).map_err(|err| {
ParseError::InvalidXml(format!("Error escribiendo XML: {err}"))
})?;
}
Ok(Event::Eof) => break,
Ok(event) => {
writer.write_event(event).map_err(|err| {
ParseError::InvalidXml(format!("Error escribiendo XML: {err}"))
})?;
}
Err(e) => {
return Err(ParseError::InvalidXml(format!(
"Error leyendo XML en posición {}: {e}",
reader.error_position()
)));
}
}
}
let result = writer.into_inner().into_inner();
String::from_utf8(result)
.map_err(|err| ParseError::InvalidXml(format!("XML resultante no es UTF-8 válido: {err}")))
}
impl XmlNode {
pub fn text_content(&self) -> String {
self.children
.iter()
.filter_map(|child| match child {
XmlChild::Text(t) => Some(t.as_str()),
XmlChild::Node(_) => None,
})
.collect::<Vec<_>>()
.join(" ")
}
pub fn child_nodes(&self) -> Vec<&XmlNode> {
self.children
.iter()
.filter_map(|child| match child {
XmlChild::Node(n) => Some(n),
XmlChild::Text(_) => None,
})
.collect()
}
pub fn find_child(&self, tag: &str) -> Option<&XmlNode> {
self.child_nodes().into_iter().find(|n| n.tag == tag)
}
pub fn get_attribute(&self, name: &str) -> Option<&str> {
self.attributes
.iter()
.find(|(k, _)| k == name)
.map(|(_, v)| v.as_str())
}
}
impl std::fmt::Display for Format {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Format::Json => write!(f, "JSON"),
Format::Xml => write!(f, "XML"),
Format::Sql => write!(f, "SQL"),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn detecta_json_objeto() {
assert_eq!(auto_detect_format(r#"{"a": 1}"#).unwrap(), Format::Json);
}
#[test]
fn detecta_json_array() {
assert_eq!(auto_detect_format("[1, 2, 3]").unwrap(), Format::Json);
}
#[test]
fn detecta_json_con_espacios() {
assert_eq!(auto_detect_format(" \n { }").unwrap(), Format::Json);
}
#[test]
fn detecta_xml() {
assert_eq!(auto_detect_format("<root/>").unwrap(), Format::Xml);
}
#[test]
fn detecta_xml_con_declaracion() {
let xml = r#"<?xml version="1.0"?><root/>"#;
assert_eq!(auto_detect_format(xml).unwrap(), Format::Xml);
}
#[test]
fn error_formato_desconocido() {
assert!(matches!(
auto_detect_format("hola mundo"),
Err(ParseError::UnknownFormat)
));
}
#[test]
fn error_entrada_vacia() {
assert!(matches!(
auto_detect_format(""),
Err(ParseError::EmptyInput)
));
assert!(matches!(
auto_detect_format(" \n\t "),
Err(ParseError::EmptyInput)
));
}
#[test]
fn parsea_json_simple() {
let json = r#"{"nombre": "Juan", "edad": 30}"#;
let value = parse_json(json).unwrap();
assert_eq!(value["nombre"], "Juan");
assert_eq!(value["edad"], 30);
}
#[test]
fn parsea_json_anidado() {
let json = r#"{
"persona": {
"nombre": "Ana",
"hobbies": ["leer", "correr"]
}
}"#;
let value = parse_json(json).unwrap();
assert_eq!(value["persona"]["nombre"], "Ana");
assert_eq!(value["persona"]["hobbies"][0], "leer");
assert_eq!(value["persona"]["hobbies"][1], "correr");
}
#[test]
fn parsea_json_array_raiz() {
let json = r#"[1, "dos", null, true]"#;
let value = parse_json(json).unwrap();
assert!(value.is_array());
assert_eq!(value[0], 1);
assert_eq!(value[1], "dos");
assert!(value[2].is_null());
assert_eq!(value[3], true);
}
#[test]
fn error_json_invalido() {
let json = r#"{"nombre": }"#;
assert!(parse_json(json).is_err());
}
#[test]
fn error_json_vacio() {
assert!(parse_json("").is_err());
}
#[test]
fn parsea_xml_simple() {
let xml = "<persona><nombre>Juan</nombre><edad>30</edad></persona>";
let node = parse_xml(xml).unwrap();
assert_eq!(node.tag, "persona");
assert_eq!(node.child_nodes().len(), 2);
assert_eq!(node.find_child("nombre").unwrap().text_content(), "Juan");
assert_eq!(node.find_child("edad").unwrap().text_content(), "30");
}
#[test]
fn parsea_xml_con_atributos() {
let xml = r#"<libro isbn="978-3-16" idioma="es"><titulo>Rust en Acción</titulo></libro>"#;
let node = parse_xml(xml).unwrap();
assert_eq!(node.tag, "libro");
assert_eq!(node.get_attribute("isbn"), Some("978-3-16"));
assert_eq!(node.get_attribute("idioma"), Some("es"));
assert_eq!(
node.find_child("titulo").unwrap().text_content(),
"Rust en Acción"
);
}
#[test]
fn parsea_xml_anidado() {
let xml = r#"
<biblioteca>
<libro>
<titulo>Don Quijote</titulo>
<autor>Cervantes</autor>
</libro>
<libro>
<titulo>Cien Años de Soledad</titulo>
<autor>García Márquez</autor>
</libro>
</biblioteca>
"#;
let node = parse_xml(xml).unwrap();
assert_eq!(node.tag, "biblioteca");
let libros = node.child_nodes();
assert_eq!(libros.len(), 2);
assert_eq!(
libros[0].find_child("titulo").unwrap().text_content(),
"Don Quijote"
);
assert_eq!(
libros[1].find_child("autor").unwrap().text_content(),
"García Márquez"
);
}
#[test]
fn parsea_xml_etiqueta_autocerrada() {
let xml = r#"<config><opcion activa="true"/></config>"#;
let node = parse_xml(xml).unwrap();
let opcion = node.find_child("opcion").unwrap();
assert_eq!(opcion.get_attribute("activa"), Some("true"));
assert!(opcion.children.is_empty());
}
#[test]
fn parsea_xml_con_declaracion() {
let xml = r#"<?xml version="1.0" encoding="UTF-8"?><raiz>contenido</raiz>"#;
let node = parse_xml(xml).unwrap();
assert_eq!(node.tag, "raiz");
assert_eq!(node.text_content(), "contenido");
}
#[test]
fn error_xml_invalido() {
let xml = "<abierto>sin cierre";
assert!(parse_xml(xml).is_err());
}
#[test]
fn error_xml_vacio() {
assert!(parse_xml("").is_err());
assert!(parse_xml(" ").is_err());
}
#[test]
fn pretty_print_json() {
let json = r#"{"b":2,"a":1}"#;
let pretty = format_pretty(json, Format::Json).unwrap();
assert!(pretty.contains('\n'));
assert!(pretty.contains(" ")); let reparsed: JsonValue = serde_json::from_str(&pretty).unwrap();
assert_eq!(reparsed["a"], 1);
assert_eq!(reparsed["b"], 2);
}
#[test]
fn pretty_print_xml() {
let xml = "<root><a>1</a><b>2</b></root>";
let pretty = format_pretty(xml, Format::Xml).unwrap();
assert!(pretty.contains('\n'));
let node = parse_xml(&pretty).unwrap();
assert_eq!(node.tag, "root");
assert_eq!(node.find_child("a").unwrap().text_content(), "1");
}
#[test]
fn error_pretty_formato_incorrecto() {
assert!(format_pretty("<root/>", Format::Json).is_err());
}
#[test]
fn xml_node_text_content_vacio() {
let node = XmlNode {
tag: "vacio".into(),
attributes: vec![],
children: vec![],
};
assert_eq!(node.text_content(), "");
}
#[test]
fn xml_node_find_child_inexistente() {
let node = XmlNode {
tag: "padre".into(),
attributes: vec![],
children: vec![XmlChild::Node(XmlNode {
tag: "hijo".into(),
attributes: vec![],
children: vec![],
})],
};
assert!(node.find_child("inexistente").is_none());
assert!(node.find_child("hijo").is_some());
}
#[test]
fn xml_node_get_attribute_inexistente() {
let node = XmlNode {
tag: "test".into(),
attributes: vec![("clave".into(), "valor".into())],
children: vec![],
};
assert_eq!(node.get_attribute("clave"), Some("valor"));
assert_eq!(node.get_attribute("otra"), None);
}
#[test]
fn detecta_sql_select() {
assert_eq!(auto_detect_format("SELECT * FROM users").unwrap(), Format::Sql);
}
#[test]
fn detecta_sql_insert() {
assert_eq!(auto_detect_format("INSERT INTO t VALUES (1)").unwrap(), Format::Sql);
}
#[test]
fn detecta_sql_create() {
assert_eq!(auto_detect_format("CREATE TABLE foo (id INT)").unwrap(), Format::Sql);
}
#[test]
fn parsea_sql_simple() {
let sql = "SELECT id, name FROM users WHERE active = 1";
assert!(parse_sql(sql).is_ok());
}
#[test]
fn error_sql_invalido() {
assert!(parse_sql("").is_err());
assert!(parse_sql(" ").is_err());
}
#[test]
fn parsea_sql_con_comentario() {
let sql = "-- Get all active users\nSELECT * FROM users WHERE active = 1";
assert!(parse_sql(sql).is_ok());
assert_eq!(auto_detect_format(sql).unwrap(), Format::Sql);
}
#[test]
fn parsea_sql_con_comentario_bloque() {
let sql = "/* schema v2 */\nCREATE TABLE foo (id INT)";
assert_eq!(auto_detect_format(sql).unwrap(), Format::Sql);
}
#[test]
fn format_display() {
assert_eq!(format!("{}", Format::Json), "JSON");
assert_eq!(format!("{}", Format::Xml), "XML");
assert_eq!(format!("{}", Format::Sql), "SQL");
}
}