sheetkit-xml 0.5.1

Serde-based OOXML schema types for Excel (.xlsx) files
Documentation
//! Shared Strings XML schema structures.
//!
//! Represents `xl/sharedStrings.xml` in the OOXML package.

use serde::{Deserialize, Serialize};

use crate::namespaces;

/// Shared String Table root element (`xl/sharedStrings.xml`).
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename = "sst")]
pub struct Sst {
    #[serde(rename = "@xmlns")]
    pub xmlns: String,

    /// Total reference count of shared strings in the workbook.
    #[serde(rename = "@count", skip_serializing_if = "Option::is_none")]
    pub count: Option<u32>,

    /// Number of unique string entries.
    #[serde(rename = "@uniqueCount", skip_serializing_if = "Option::is_none")]
    pub unique_count: Option<u32>,

    /// Shared string items.
    #[serde(rename = "si", default)]
    pub items: Vec<Si>,
}

/// Shared String Item.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Si {
    /// Plain text content.
    #[serde(rename = "t", skip_serializing_if = "Option::is_none")]
    pub t: Option<T>,

    /// Rich text runs (formatted text).
    #[serde(rename = "r", default)]
    pub r: Vec<R>,
}

/// Text element with optional space preservation.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct T {
    #[serde(
        rename = "@xml:space",
        alias = "@space",
        skip_serializing_if = "Option::is_none"
    )]
    pub xml_space: Option<String>,

    #[serde(rename = "$value", default)]
    pub value: String,
}

/// Rich text run.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct R {
    /// Run properties (formatting).
    #[serde(rename = "rPr", skip_serializing_if = "Option::is_none")]
    pub r_pr: Option<RPr>,

    /// Text content.
    #[serde(rename = "t")]
    pub t: T,
}

/// Run properties (text formatting within a rich text run).
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct RPr {
    #[serde(rename = "b", skip_serializing_if = "Option::is_none")]
    pub b: Option<BoolVal>,

    #[serde(rename = "i", skip_serializing_if = "Option::is_none")]
    pub i: Option<BoolVal>,

    #[serde(rename = "sz", skip_serializing_if = "Option::is_none")]
    pub sz: Option<FontSize>,

    #[serde(rename = "color", skip_serializing_if = "Option::is_none")]
    pub color: Option<Color>,

    #[serde(rename = "rFont", skip_serializing_if = "Option::is_none")]
    pub r_font: Option<FontName>,

    #[serde(rename = "family", skip_serializing_if = "Option::is_none")]
    pub family: Option<FontFamily>,

    #[serde(rename = "scheme", skip_serializing_if = "Option::is_none")]
    pub scheme: Option<FontScheme>,
}

/// Boolean value wrapper.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct BoolVal {
    #[serde(rename = "@val", skip_serializing_if = "Option::is_none")]
    pub val: Option<bool>,
}

/// Font size.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct FontSize {
    #[serde(rename = "@val")]
    pub val: f64,
}

/// Color.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Color {
    #[serde(rename = "@rgb", skip_serializing_if = "Option::is_none")]
    pub rgb: Option<String>,

    #[serde(rename = "@theme", skip_serializing_if = "Option::is_none")]
    pub theme: Option<u32>,

    #[serde(rename = "@tint", skip_serializing_if = "Option::is_none")]
    pub tint: Option<f64>,
}

/// Font name.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct FontName {
    #[serde(rename = "@val")]
    pub val: String,
}

/// Font family.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct FontFamily {
    #[serde(rename = "@val")]
    pub val: u32,
}

/// Font scheme.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct FontScheme {
    #[serde(rename = "@val")]
    pub val: String,
}

impl Default for Sst {
    fn default() -> Self {
        Self {
            xmlns: namespaces::SPREADSHEET_ML.to_string(),
            count: Some(0),
            unique_count: Some(0),
            items: vec![],
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    // NOTE: quick-xml's serde Deserializer trims leading whitespace from text
    // events by default (via internal StartTrimmer). This means text like " text"
    // becomes "text" after deserialization. For production use, the higher-level
    // reader should handle whitespace preservation using the raw Reader API.
    // Tests here verify the serde round-trip behavior as-is.

    #[test]
    fn test_sst_default() {
        let sst = Sst::default();
        assert_eq!(sst.xmlns, namespaces::SPREADSHEET_ML);
        assert_eq!(sst.count, Some(0));
        assert_eq!(sst.unique_count, Some(0));
        assert!(sst.items.is_empty());
    }

    #[test]
    fn test_sst_roundtrip() {
        let sst = Sst {
            xmlns: namespaces::SPREADSHEET_ML.to_string(),
            count: Some(3),
            unique_count: Some(2),
            items: vec![
                Si {
                    t: Some(T {
                        xml_space: None,
                        value: "Hello".to_string(),
                    }),
                    r: vec![],
                },
                Si {
                    t: Some(T {
                        xml_space: None,
                        value: "World".to_string(),
                    }),
                    r: vec![],
                },
            ],
        };
        let xml = quick_xml::se::to_string(&sst).unwrap();
        let parsed: Sst = quick_xml::de::from_str(&xml).unwrap();
        assert_eq!(sst.count, parsed.count);
        assert_eq!(sst.unique_count, parsed.unique_count);
        assert_eq!(sst.items.len(), parsed.items.len());
        assert_eq!(
            sst.items[0].t.as_ref().unwrap().value,
            parsed.items[0].t.as_ref().unwrap().value
        );
    }

    #[test]
    fn test_sst_with_plain_strings() {
        let sst = Sst {
            xmlns: namespaces::SPREADSHEET_ML.to_string(),
            count: Some(2),
            unique_count: Some(2),
            items: vec![
                Si {
                    t: Some(T {
                        xml_space: None,
                        value: "Name".to_string(),
                    }),
                    r: vec![],
                },
                Si {
                    t: Some(T {
                        xml_space: None,
                        value: "Age".to_string(),
                    }),
                    r: vec![],
                },
            ],
        };
        let xml = quick_xml::se::to_string(&sst).unwrap();
        assert!(xml.contains("Name"));
        assert!(xml.contains("Age"));
    }

    #[test]
    fn test_sst_with_rich_text() {
        let sst = Sst {
            xmlns: namespaces::SPREADSHEET_ML.to_string(),
            count: Some(1),
            unique_count: Some(1),
            items: vec![Si {
                t: None,
                r: vec![
                    R {
                        r_pr: Some(RPr {
                            b: Some(BoolVal { val: None }),
                            i: None,
                            sz: Some(FontSize { val: 11.0 }),
                            color: None,
                            r_font: Some(FontName {
                                val: "Calibri".to_string(),
                            }),
                            family: None,
                            scheme: None,
                        }),
                        t: T {
                            xml_space: None,
                            value: "Bold".to_string(),
                        },
                    },
                    R {
                        r_pr: None,
                        t: T {
                            xml_space: None,
                            value: " Normal".to_string(),
                        },
                    },
                ],
            }],
        };

        let xml = quick_xml::se::to_string(&sst).unwrap();
        let parsed: Sst = quick_xml::de::from_str(&xml).unwrap();
        assert_eq!(parsed.items.len(), 1);
        assert!(parsed.items[0].t.is_none());
        assert_eq!(parsed.items[0].r.len(), 2);
        assert!(parsed.items[0].r[0].r_pr.is_some());
        assert!(parsed.items[0].r[0].r_pr.as_ref().unwrap().b.is_some());
        assert_eq!(parsed.items[0].r[0].t.value, "Bold");
        // Note: quick-xml's StartTrimmer trims leading whitespace from text
        // after a start tag. " Normal" becomes "Normal" during deserialization.
        // The higher-level reader must handle whitespace preservation.
        assert_eq!(parsed.items[0].r[1].t.value, "Normal");
    }

    #[test]
    fn test_parse_real_excel_shared_strings() {
        let xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="4" uniqueCount="3">
  <si><t>Name</t></si>
  <si><t>Value</t></si>
  <si><t>Description</t></si>
</sst>"#;

        let parsed: Sst = quick_xml::de::from_str(xml).unwrap();
        assert_eq!(parsed.count, Some(4));
        assert_eq!(parsed.unique_count, Some(3));
        assert_eq!(parsed.items.len(), 3);
        assert_eq!(parsed.items[0].t.as_ref().unwrap().value, "Name");
        assert_eq!(parsed.items[1].t.as_ref().unwrap().value, "Value");
        assert_eq!(parsed.items[2].t.as_ref().unwrap().value, "Description");
    }

    #[test]
    fn test_parse_real_excel_rich_text_shared_strings() {
        let xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="1" uniqueCount="1">
  <si>
    <r>
      <rPr><b/><sz val="11"/><rFont val="Calibri"/></rPr>
      <t>Bold</t>
    </r>
    <r>
      <t> text</t>
    </r>
  </si>
</sst>"#;

        let parsed: Sst = quick_xml::de::from_str(xml).unwrap();
        assert_eq!(parsed.items.len(), 1);
        let item = &parsed.items[0];
        assert!(item.t.is_none());
        assert_eq!(item.r.len(), 2);
        assert!(item.r[0].r_pr.is_some());
        assert!(item.r[0].r_pr.as_ref().unwrap().b.is_some());
        assert_eq!(item.r[0].t.value, "Bold");
        // Leading whitespace trimmed by quick-xml's StartTrimmer
        assert_eq!(item.r[1].t.value, "text");
    }

    #[test]
    fn test_text_with_space_preservation() {
        let t = T {
            xml_space: Some("preserve".to_string()),
            value: "  leading spaces  ".to_string(),
        };
        let xml = quick_xml::se::to_string(&t).unwrap();
        assert!(xml.contains("xml:space=\"preserve\""));
        let parsed: T = quick_xml::de::from_str(&xml).unwrap();
        assert_eq!(parsed.xml_space, Some("preserve".to_string()));
        // Note: quick-xml's StartTrimmer trims leading whitespace.
        // The xml:space="preserve" attribute is preserved in the struct
        // for correct re-serialization; actual whitespace preservation
        // requires the higher-level reader to use raw Reader API.
        assert_eq!(parsed.value, "leading spaces");
    }

    #[test]
    fn test_empty_sst_roundtrip() {
        let sst = Sst::default();
        let xml = quick_xml::se::to_string(&sst).unwrap();
        let parsed: Sst = quick_xml::de::from_str(&xml).unwrap();
        assert!(parsed.items.is_empty());
        assert_eq!(parsed.count, Some(0));
        assert_eq!(parsed.unique_count, Some(0));
    }

    #[test]
    fn test_sst_serialize_structure() {
        let sst = Sst {
            xmlns: namespaces::SPREADSHEET_ML.to_string(),
            count: Some(1),
            unique_count: Some(1),
            items: vec![Si {
                t: Some(T {
                    xml_space: None,
                    value: "test".to_string(),
                }),
                r: vec![],
            }],
        };
        let xml = quick_xml::se::to_string(&sst).unwrap();
        assert!(xml.contains("<sst"));
        assert!(xml.contains("<si>"));
        assert!(xml.contains("<t>"));
        assert!(xml.contains("test"));
    }
}