diet-xml 0.2.0

Probably the simplest, most approachable XML builder for Rust
Documentation

diet-xml

Crates.io Documentation License: MIT

Probably the simplest, most approachable XML builder for Rust

diet-xml

🔧 Use Case: Ideal for generating deeply hierarchical and grouped XML from sources like CSVs, APIs and database rows.

Aims to be a library you can just pick up and use within a few minutes. Dependancy free.

cargo add diet-xml
using diet_xml::XmlBuilder

License

This project is licensed under the MIT License - see the LICENSE file for details.

Basic usage example

use diet_xml::XmlBuilder;

fn main() {


    // example data
    let employees = vec![
        Employee { id: 1, first: "John", last: "Doe", dob: "1900-01-01" },
        Employee { id: 2, first: "Jane", last: "Doe", dob: "1800-12-31" },
        Employee { id: 3, first: "John", last: "Dough", dob: "1700-01-01" },
     
    ];

    // create an XmlBuilder struct

    // this is a struct that allows you build an xml output in memory
    let mut xb = XmlBuilder::new();
    
    // define the schema you wish to populate in plain text
    xb.set_schema("
    <root>
        <employee>
            <name>
                <first></first>
                <last></last>
            </name>
            <info>
                <dob></dob>
            </info>
        </employee>
        <passing_str_ok></passing_str_ok>
        <passing_i32_ok></passing_i32_ok>
        <name!2></name!2>   
    </root>");

    // default header is <?xml version="1.0" encoding="UTF-8"?>
    // to remove all headers use xb.clear_header();
    // to add custom headers, multiple allowed use xb.set_header("your customer header, < > will be applied the the star/end automatically");

    for e in employees{

        // we want each id to relate to it's own employee element
        // so use the id as the key 
        // we can also chain into attributes to have the id display as an element attribute
      
        // covert id to &str
        // the types used by set_key() and attributes()
         

        // set_key(), add_element(), attribute() all accept two arguments
        // (&str, V) where V is any type that implments .to_string()
        // eg string, &str usize, u32...


        // these arguments can be anything that implments .to_string()
        // eg String, usize, &str, int
        // (element_the_key_is_on, key_value) 
        // this is to allow the other to quickly add values to elements
        // without needing boiler plate to convert to text where possible

        xb.set_key("employee", e.id)
            .attribute("id", e.id);

        // you can chain either an add_element or set_key into .attribute
        // this takes two arguments
        // these arguments can be anything that implments .to_string()
        // eg String, usize, &str, int
        // (attribute_name, attribute_value)
        // the .attributes() method can be used to add multiple attributes
        // this should be passed in the form
        // &[(&str,&str),(&str,&str),(&str,&str),(&str,&str)]
        // tuples pairs of &str with attribute name and value
        // attribute and attributes can be chained from add_element also


        // now we simply add the element values we want populated
        // there is no need to build the entire structure
        // unlike other libraries the parent elements are implicitly built
        xb.add_element("first", e.first);
        xb.add_element("last", e.last);

        // you can chain into the cdata() method to enclose an element in cdata tags
        xb.add_element("dob", e.dob).cdata();

        // clears keys resets all context to a default value
        // this isn't necessary in this sample program
        // as keys are all overwritten on each iteration
        // keys retain their state until either overwritten or cleared
        xb.clear_keys();       
   }


    // passing any type that implements to_string as a value for
    // add_element, attribute(), set_key is ok
    xb.add_element("passing_str_ok", "some str");
    xb.add_element("passing_i32_ok", 111222333);

    // duplicate element names should be distinguised by !AnyAlphaNumtext in the schema definition
    // the suffix will be remove when the XML is produced
    xb.add_element("name!2", "suffix !2 has been removed, this enables use of duplicate element names");

   // builds the xml in the background
   xb.build_xml();


    // function .xml_out() returns string of the xml output
    println!("{}", xb.xml_out());
}




struct Employee {
    id: usize,
    first: &'static str,
    last: &'static str,
    dob: &'static str,
}

See the examples below - and be generating XML in under 5 minutes.

Table of Contents

Verion 0.2.0 added: CDATA support for element values Custom XML headers (add or clear headers easily) Duplicate element name handling via schema suffixes Accepts any type implementing to_string() for element, key, and attribute values —no manual conversion needed Library now dependancy free (dependancy removed for fxhash)

⚠️ Experimental crate The API is still evolving. Expect changes in naming, behavior, and error handling.

Quick Start

cargo add diet-xml '''rust use '''

API Overview

  • XmlBuilder::new() - Create a new XML builder
  • set_schema(schema) - Define the XML structure template
  • set_key(element, key) - Select which instance of a parent element to assign any added elements under
  • add_element(name, value) - Add content to an element
  • build_xml() - Generate the final XML
  • xml_out() - Get the resulting XML string

Examples

Basic Example

use diet_xml::XmlBuilder;

fn main() {
    let mut xb: XmlBuilder = XmlBuilder::new();

    // schemas are defined in plain text but must well formed with matching Tag, this is validated by the library
    let txt_schema = 
    "<root>
        <department>
            <employee>
                <name></name>
            </employee>
        </department>
    </root>";

    xb.set_schema(txt_schema); // set schema
    xb.add_element("name", "John Dough"); // adds an element, (nm_element,element_value) both as &str, "name" must be contained in the schema, and must be a bottom level element
    xb.build_xml();  // builds a Str // accesses via xb.xml_out

    println!("{}", xb.xml_out());
}

Output:

<root>
  <department>
    <employee>
      <name>John Dough</name>
    </employee>
  </department>
</root>

Parent elements are implicitly built in diet-xml - we simply add them to the schema and they are automatically included.

Parent Elements

Parent elements are automatically included in the document - you simply add them to the schema definition and no extra coding is required to include them:

use diet_xml::XmlBuilder;

fn main() {
    let mut xb: XmlBuilder = XmlBuilder::new();

    // schemas are defined in plain text but must be well formed with matching tags, this is validated by the library
    let txt_schema = 
    "<root>
        <g1><g2><g3><g4><g5><g6>
            <department>
                <employee>
                    <name></name>
                </employee>
            </department>
        </g6></g5></g4></g3></g2></g1>
    </root>";

    xb.set_schema(txt_schema); // set schema
    xb.add_element("name", "John Dough"); 
    xb.build_xml();

    println!("{}", xb.xml_out());
}

Output:

<root>
  <g1>
    <g2>
      <g3>
        <g4>
          <g5>
            <g6>
              <department>
                <employee>
                  <name>John Dough</name>
                </employee>
              </department>
            </g6>
          </g5>
        </g4>
      </g3>
    </g2>
  </g1>
</root>

Multiple Elements

This time we add multiple elements to the document:

use diet_xml::XmlBuilder;

fn main() {
    let mut xb: XmlBuilder = XmlBuilder::new();

    // schemas are defined in plain text but must be well formed with matching tags, this is validated by the library
    let txt_schema = 
    "<root>
        <department>
            <employee>
                <name></name>
            </employee>
        </department>
    </root>";

    xb.set_schema(txt_schema); // set schema
    xb.add_element("name", "John Dough"); 
    xb.add_element("name", "Jane Dough"); // 2nd element added
    xb.build_xml();  

    println!("{}", xb.xml_out());
}

Output: Output:

We see there are two elements added, but we see they appear side by side within the same parent element.

``` xml
<root>
  <department>
    <employee>
      <name>John Dough</name>
      <name>Jane Dough</name>
    </employee>
  </department>
</root>

-This is because the parents all have the same keys, as such there is only one of each parent element

-Say we want to place these different elements in seperate elements we must use the set key method

  • This again takes text as arguments and you provide the element name (nm_element) (must be in schema) and key
  • When you add an element after this, it will be grouped according to this key = (whenever no group has been assigned, or has been clear and not assigned, everything will be grouped together in the same default group and appear first in the document order)

Using Keys

use diet_xml::XmlBuilder;

fn main() {
    let mut xb: XmlBuilder = XmlBuilder::new();

    // schemas are defined in plain text but must be well formed with matching tags, this is validated by the library
    let txt_schema = 
    "<root>
        <department>
            <employee>
                <name></name>
            </employee>
        </department>
    </root>";

    xb.set_schema(txt_schema); // set schema
    xb.set_key("employee", "1"); //set key
    xb.add_element("name", "John Dough"); 
    xb.set_key("employee", "0"); //set to another key - forces creation of a new employee element
    xb.add_element("name", "Jane Dough"); // 2nd element added
    xb.build_xml();  

    println!("{}", xb.xml_out());
}

Output:

<root>
  <department>
    <employee>
      <name>John Dough</name>
    </employee>
    <employee>
      <name>Jane Dough</name>
    </employee>
  </department>
</root>

Attributes

This can either be chained to a set_key to apply to a parent element before adding the next element, or it can be added to the element itself, to be displayed on the deepest element (the one you pass a value on): -chaining has been deliberately limited to .attribute() to prevent generation of dense unmaintable code

use diet_xml::XmlBuilder;

fn main() {

let mut xb: XmlBuilder = XmlBuilder::new();

// schemas are defined in plan text but must well formed with matching Tag, this is validated by the library
let txt_schema = 
"<root> <g1><g2><g3><g4><g5><g6>
    <department>
        <employee>
            <name> <name>
        </employee>
    </department>
    </g6></g5></g4></g3></g2></g1>
</root>

";

xb.set_schema(txt_schema); // set schema

//xb.set_key("employee", "1") ; 
xb.set_key("employee", "1").attributes(&[("id","1"),("initials","JD") ]);
xb.add_element("name", "John Dough"); 
xb.set_key("employee", "2").attributes(&[("id","2"),("initials","JD") ]);
xb.add_element("name", "Jane Dough").attribute("CITY","PARIS");
// attribute() simpler to use when just one attribute required


xb.build_xml();

println!("{}", xb.xml_out());

}
<root>
  <g1>
    <g2>
      <g3>
        <g4>
          <g5>
            <g6>
              <department>
                <employee id="1" initials="JD">
                  <name>John Dough</name>
                </employee>
                <employee id="2" initials="JD">
                  <name CITY="PARIS">Jane Dough</name>
                </employee>
              </department>
            </g6>
          </g5>
        </g4>
      </g3>
    </g2>
  </g1>
</root>

Clear Keys

Here we clear keys after setting them. This demonstrates how clearing keys resets grouping to the default state. When building complicated structures, it can be best to clear keys at the end of iterations, before moving onto the next:

use diet_xml::XmlBuilder;

fn main() {
    let mut xb: XmlBuilder = XmlBuilder::new();

    // schemas are defined in plain text but must be well formed with matching tags, this is validated by the library
    let txt_schema = 
    "<root> 
        <department>
            <employee>
                <name></name>
            </employee>
        </department>
    </root>";
    
    xb.set_schema(txt_schema); // set schema
    xb.set_key("employee", "1");
    xb.clear_keys(); //clear key // previous set key ignored
    xb.add_element("name", "John Dough"); 
    xb.set_key("employee", "2");
    xb.clear_keys();  //clear key // previous set key ignored
    xb.add_element("name", "Jane Dough");

    xb.build_xml();
    println!("{}", xb.xml_out());
}

Output:

Here we see the keys were all ignored, due to the clear_keys() method being called:

<root>
  <department>
    <employee>
      <name>John Dough</name>
      <name>Jane Dough</name>
    </employee>
  </department>
</root>

Large Dataset

  • here we will use a csv stored on the DietXML github page to produce a larger xml

Comparison

Why this library exists

  • The below code is roughly equivalent in function to the quick-xml/serde example below
  • diet-xml can accomplish in a few dozen lines what other libraries require hundreds of lines to do

diet-xml version

use csv::Reader;
use std::io::Cursor;
use diet_xml::XmlBuilder;


fn main() {
   let url = "https://raw.githubusercontent.com/Developer-BT/DietXML/main/Rust/test_data/flights.csv";
   
   // Download CSV
   let csv_text = download_csv(url);
   
   // Parse CSV
   let csv_data = parse_csv(csv_text);
   
   // Show info
   println!("Headers: {:?}", csv_data.headers);
   println!("Got {} records", csv_data.records.len());

   let mut xb = XmlBuilder::new();
   let txt_schema = 
"<root>
   <airlines>
       <airline>
           <year>
               <flight>
                   <airports>
                       <origin></origin>
                       <destination></destination>
                   </airports>
                   <details>
                       <time></time>
                       <air_time></air_time>
                   </details>
               </flight>
           </year>
       </airline>
   </airlines>
</root>";

   xb.set_schema(txt_schema);

   for r in csv_data.iter() { 
       // get field values from csv record
       let name = r.get_field("name").unwrap();
       let carrier = r.get_field("carrier").unwrap();
       let year = r.get_field("year").unwrap();
       let id = r.get_field("id").unwrap();
       let flight = r.get_field("flight").unwrap();
       let origin = r.get_field("origin").unwrap();
       let destination = r.get_field("dest").unwrap();
       let time= r.get_field("time_hour").unwrap();
       let air_time= r.get_field("air_time").unwrap();
       
       

       // add to xml

       // set keys for each row
       xb.set_key("airline", name).attributes(&[("carrier",carrier),("name",name)]) ;
       xb.set_key("year", year).attribute("year",year) ;
       xb.set_key("flight", id).attributes(&[("id",id),("flight",flight)]) ;
       // add elements with values
       xb.add_element("origin", origin);
       xb.add_element("destination", destination);
       xb.add_element("time", time);
       xb.add_element("air_time", air_time);

       // ready for next iteration
       xb.clear_keys();
       




   }

   xb.build_xml();
   println!("{}", xb.xml_out());
}



////////////////////////////////////////////////////
//////////csv related functions below///////////////
////////////////////////////////////////////////////
struct CsvRecord<'a> {
   record: &'a csv::StringRecord,
   headers: &'a csv::StringRecord,
}

impl<'a> CsvRecord<'a> {
   fn get_field(&self, column_name: &str) -> Option<&str> {
       for (i, header) in self.headers.iter().enumerate() {
           if header == column_name {
               return self.record.get(i);
           }
       }
       None
   }
}

struct CsvData {
   headers: csv::StringRecord,
   records: Vec<csv::StringRecord>,
}

impl CsvData {
   fn iter(&self) -> impl Iterator<Item = CsvRecord> {
       self.records.iter().map(move |record| CsvRecord {
           record,
           headers: &self.headers,
       })
   }
}

fn download_csv(url: &str) -> String {
   reqwest::blocking::get(url).unwrap().text().unwrap()
}

fn parse_csv(csv_text: String) -> CsvData {
   let mut reader = Reader::from_reader(Cursor::new(csv_text));
   let headers = reader.headers().unwrap().clone();
   let records: Vec<csv::StringRecord> = reader.records().map(|r| r.unwrap()).collect();
   
   CsvData { headers, records }
}

Output

...
    </airline>
    <airline carrier="F9" name="Frontier Airlines Inc.">
        <flight id="145" flight="835">
          <airports>
            <origin>LGA</origin>
            <destination>DEN</destination>
          </airports>
          <details>
            <time>2013-01-01 08:00:00</time>
            <air_time>257.0</air_time>
          </details>
        </flight>
        <flight id="592" flight="511">
          <airports>
            <origin>LGA</origin>
            <destination>DEN</destination>
          </airports>
          <details>
            <time>2013-01-01 17:00:00</time>
            <air_time>242.0</air_time>
          </details>
        </flight>
        <flight id="1023" flight="835">
          <airports>
            <origin>LGA</origin>
            <destination>DEN</destination>
          </airports>
          <details>
            <time>2013-01-02 08:00:00</time>
            <air_time>239.0</air_time>
          </details>
        </flight>
        <flight id="1516" flight="511">
          <airports>
            <origin>LGA</origin>
            <destination>DEN</destination>
          </airports>
          <details>
            <time>2013-01-02 17:00:00</time>
            <air_time>238.0</air_time>
          </details>
        </flight>
        <flight id="1963" flight="835">
          <airports>
            <origin>LGA</origin>
            <destination>DEN</destination>
          </airports>
          <details>
            <time>2013-01-03 08:00:00</time>
            <air_time>219.0</air_time>
          </details>
        </flight>
    </airline>
    <airline carrier="HA" name="Hawaiian Airlines Inc.">
        <flight id="162" flight="51">
          <airports>
            <origin>JFK</origin>
            <destination>HNL</destination>
          </airports>
          <details>
            <time>2013-01-01 09:00:00</time>
            <air_time>659.0</air_time>
          </details>
        </flight>
        <flight id="1073" flight="51">
          <airports>
            <origin>JFK</origin>
            <destination>HNL</destination>
          </airports>
          <details>
            <time>2013-01-02 09:00:00</time>
            <air_time>638.0</air_time>
          </details>
        </flight>
        <flight id="2018" flight="51">
          <airports>
            <origin>JFK</origin>
            <destination>HNL</destination>
          </airports>
          <details>
            <time>2013-01-03 09:00:00</time>
            <air_time>616.0</air_time>
          </details>
        </flight>
      </year>
    </airline>
  </airlines>
</root>

quick-xml / serde version

Cargo.toml

[dependencies]
serde = { version = "1.0", features = ["derive"] }
csv = "1.3.1"
diet-xml = { path = "C:\\Users\\PC\\Desktop\\dietSQL\\readschema" }
quick-xml = { version = "0.38", features = ["serialize"] }
reqwest = { version = "0.12.22", features = ["blocking"] }

use csv::Reader;
use std::io::Cursor;
use serde::Serialize;
use quick_xml::se::to_string;
use std::collections::BTreeMap;

#[derive(Serialize)]
struct Root {
    airlines: Airlines,
}

#[derive(Serialize)]
struct Airlines {
    #[serde(rename = "airline")]
    airlines: Vec<Airline>,
}

#[derive(Serialize)]
struct Airline {
    #[serde(rename = "@carrier")]
    carrier: String,
    #[serde(rename = "@name")]
    name: String,
    #[serde(rename = "year")]
    years: Vec<Year>,
}

#[derive(Serialize)]
struct Year {
    #[serde(rename = "@year")]
    year: String,
    #[serde(rename = "flight")]
    flights: Vec<Flight>,
}

#[derive(Serialize)]
struct Flight {
    #[serde(rename = "@id")]
    id: String,
    #[serde(rename = "@flight")]
    flight: String,
    airports: Airports,
    details: Details,
}

#[derive(Serialize)]
struct Airports {
    origin: String,
    destination: String,
}

#[derive(Serialize)]
struct Details {
    time: String,
    air_time: String,
}

fn build_structured_data(csv_data: CsvData) -> Root {
    // Group data by airline -> year -> flights (with sorting)
    let mut airline_groups: BTreeMap<String, BTreeMap<String, Vec<FlightData>>> = BTreeMap::new();
    
    // First pass: group all data
    for r in csv_data.iter() {
        let flight_data = FlightData {
            carrier: r.get_field("carrier").unwrap_or("").to_string(),
            name: r.get_field("name").unwrap_or("Unknown").to_string(),
            year: r.get_field("year").unwrap_or("0").to_string(),
            id: r.get_field("id").unwrap_or("").to_string(),
            flight: r.get_field("flight").unwrap_or("").to_string(),
            origin: r.get_field("origin").unwrap_or("").to_string(),
            destination: r.get_field("dest").unwrap_or("").to_string(),
            time: r.get_field("time_hour").unwrap_or("").to_string(),
            air_time: r.get_field("air_time").unwrap_or("").to_string(),
        };
        
        airline_groups
            .entry(flight_data.name.clone())
            .or_insert_with(BTreeMap::new)
            .entry(flight_data.year.clone())
            .or_insert_with(Vec::new)
            .push(flight_data);
    }

    // Sort flights within each year by flight number
    for (_, years) in airline_groups.iter_mut() {
        for (_, flights) in years.iter_mut() {
            flights.sort_by(|a, b| a.flight.cmp(&b.flight));
        }
    }

    // Convert grouped data to serde structs
    let airlines: Vec<Airline> = airline_groups
        .into_iter()
        .map(|(airline_name, years)| {
            let first_year = years.values().next().unwrap();
            let first_flight = first_year.first().unwrap();
            let carrier = first_flight.carrier.clone();

            let years: Vec<Year> = years
                .into_iter()
                .map(|(year_value, flights)| {
                    let flights: Vec<Flight> = flights
                        .into_iter()
                        .map(|flight_data| Flight {
                            id: flight_data.id,
                            flight: flight_data.flight,
                            airports: Airports {
                                origin: flight_data.origin,
                                destination: flight_data.destination,
                            },
                            details: Details {
                                time: flight_data.time,
                                air_time: flight_data.air_time,
                            },
                        })
                        .collect();

                    Year {
                        year: year_value,
                        flights,
                    }
                })
                .collect();

            Airline {
                carrier,
                name: airline_name,
                years,
            }
        })
        .collect();

    Root {
        airlines: Airlines { airlines },
    }
}

#[derive(Clone)]
struct FlightData {
    carrier: String,
    name: String,
    year: String,
    id: String,
    flight: String,
    origin: String,
    destination: String,
    time: String,
    air_time: String,
}

fn main() {
    let url = "https://raw.githubusercontent.com/Developer-BT/DietXML/main/Rust/test_data/flights.csv";
    
    let csv_text = download_csv(url);
    let csv_data = parse_csv(csv_text);
    
    println!("Headers: {:?}", csv_data.headers);
    println!("Got {} records", csv_data.records.len());

    // Build structured data with proper grouping and sorting
    let root = build_structured_data(csv_data);
    
    // Serialize to XML
    let xml = to_string(&root).unwrap();
    println!("{}", xml);
}

// ... rest of CSV handling code stays the same ...

struct CsvRecord<'a> {
    record: &'a csv::StringRecord,
    headers: &'a csv::StringRecord,
}

impl<'a> CsvRecord<'a> {
    fn get_field(&self, column_name: &str) -> Option<&str> {
        for (i, header) in self.headers.iter().enumerate() {
            if header == column_name {
                return self.record.get(i);
            }
        }
        None
    }
}

struct CsvData {
    headers: csv::StringRecord,
    records: Vec<csv::StringRecord>,
}

impl CsvData {
    fn iter(&self) -> impl Iterator<Item = CsvRecord> {
        self.records.iter().map(move |record| CsvRecord {
            record,
            headers: &self.headers,
        })
    }
}

fn download_csv(url: &str) -> String {
    reqwest::blocking::get(url).unwrap().text().unwrap()
}

fn parse_csv(csv_text: String) -> CsvData {
    let mut reader = Reader::from_reader(Cursor::new(csv_text));
    let headers = reader.headers().unwrap().clone();
    let records: Vec<csv::StringRecord> = reader.records().map(|r| r.unwrap()).collect();
    
    CsvData { headers, records }
}