vexy-vsvg-plugin-sdk 2.4.2

Plugin SDK for vexy-vsvg
Documentation
// this_file: crates/vexy-vsvg-plugin-sdk/src/plugins/remove_doctype.rs

//! Remove DOCTYPE plugin implementation
//!
//! This plugin removes DOCTYPE declarations from SVG documents. DOCTYPE declarations
//! are unnecessary in modern SVG and were officially discouraged by the SVG Working Group.
//!
//! ## What It Removes
//!
//! All `<!DOCTYPE ...>` declarations, including:
//! - `<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://...">`
//! - `<!DOCTYPE svg SYSTEM "custom.dtd">`
//! - Any malformed or misplaced DOCTYPE nodes
//!
//! ## Why This Is Safe
//!
//! According to the SVG Working Group:
//!
//! > "The SVG DTDs are a source of so many issues that the SVG WG has decided not to write
//! > one for the upcoming SVG 1.2 standard. In fact SVG WG members are even telling people
//! > not to use a DOCTYPE declaration in SVG 1.0 and 1.1 documents."
//!
//! Reference: https://jwatt.org/svg/authoring/#doctype-declaration
//!
//! Modern browsers parse SVG in XML mode without requiring a DOCTYPE. The DOCTYPE adds
//! parsing overhead and can cause validation issues when SVG contains modern features
//! not defined in the old DTDs.
//!
//! ## Configuration
//!
//! This plugin accepts no configuration parameters.
//!
//! ## Example
//!
//! Before:
//! ```xml
//! <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
//!   "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
//! <svg>
//!   <rect/>
//! </svg>
//! ```
//!
//! After:
//! ```xml
//! <svg>
//!   <rect/>
//! </svg>
//! ```
//!
//! ## SVGO Compatibility
//!
//! Ports SVGO's `removeDoctype` plugin. Matches the same behavior.
//!
//! Reference: https://github.com/svg/svgo/blob/main/plugins/removeDoctype.js

use anyhow::Result;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use vexy_vsvg::ast::{Document, Node};

use crate::Plugin;

/// Configuration parameters for remove doctype plugin
///
/// This plugin requires no configuration. The struct exists for API consistency
/// with other plugins.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
#[derive(Default)]
pub struct RemoveDoctypeConfig {
    // No configuration options - matches SVGO behavior
}

/// Plugin that removes DOCTYPE declarations from SVG documents
///
/// Removes all `<!DOCTYPE>` nodes from the document, primarily from the prologue
/// (before the root `<svg>` element) but also handles misplaced DOCTYPE nodes
/// that may appear elsewhere.
pub struct RemoveDoctypePlugin {
    #[allow(dead_code)]
    config: RemoveDoctypeConfig,
}

impl RemoveDoctypePlugin {
    /// Check if a text node contains only formatting whitespace
    ///
    /// Returns `true` if the text is empty after trimming AND contains newlines/tabs.
    /// Used to identify text nodes added purely for source formatting.
    fn is_formatting_whitespace(text: &str) -> bool {
        text.trim().is_empty() && text.chars().any(|c| matches!(c, '\n' | '\r' | '\t'))
    }

    /// Remove formatting-only whitespace text nodes from a node list
    ///
    /// After removing DOCTYPE nodes, we may have leftover whitespace from source formatting.
    /// This removes those text nodes ONLY if there's actual content (elements, comments, etc.).
    ///
    /// We check for more node types than just elements/text because prologue can contain
    /// processing instructions and comments that should prevent whitespace cleanup.
    fn cleanup_formatting_whitespace(nodes: &mut Vec<Node>) {
        // Check if there's any real content (not just formatting whitespace)
        let has_non_whitespace_content = nodes.iter().any(|node| match node {
            Node::Element(_) => true,
            Node::Text(text) => !text.trim().is_empty(),
            // Comments, processing instructions, DOCTYPE, and CDATA are real content
            Node::Comment(_)
            | Node::ProcessingInstruction { .. }
            | Node::DocType(_)
            | Node::CData(_) => true,
        });

        // Only clean up whitespace if there's actual content
        if has_non_whitespace_content {
            nodes.retain(
                |node| !matches!(node, Node::Text(text) if Self::is_formatting_whitespace(text)),
            );
        }
    }

    /// Recursively clean up formatting whitespace in the document tree
    ///
    /// After removing DOCTYPE nodes, normalizes text children and removes formatting
    /// whitespace throughout the tree. Preserves whitespace in text-rendering elements
    /// where it has semantic meaning.
    fn cleanup_formatting_whitespace_recursive(element: &mut vexy_vsvg::ast::Element<'_>) {
        // Check if this is a text-rendering element where whitespace is semantic
        let preserve = matches!(
            element.name.as_ref(),
            "text" | "tspan" | "tref" | "textPath" | "altGlyph"
        );

        // Trim text nodes (unless in text-rendering elements)
        for child in &mut element.children {
            if let Node::Text(text) = child {
                if !preserve && (text.contains('\n') || text.contains('\r') || text.contains('\t'))
                {
                    let trimmed = text.trim();
                    if !trimmed.is_empty() {
                        *text = trimmed.into();
                    }
                }
            }
        }

        // Remove formatting-only whitespace nodes
        Self::cleanup_formatting_whitespace(&mut element.children);

        // Recurse into child elements
        for child in &mut element.children {
            if let Node::Element(child_element) = child {
                Self::cleanup_formatting_whitespace_recursive(child_element);
            }
        }
    }

    /// Create a new RemoveDoctypePlugin with default configuration
    pub fn new() -> Self {
        Self {
            #[allow(dead_code)]
            config: RemoveDoctypeConfig::default(),
        }
    }

    /// Create a new RemoveDoctypePlugin with specific configuration
    ///
    /// The config parameter exists for API consistency but has no effect since
    /// this plugin requires no configuration.
    pub fn with_config(config: RemoveDoctypeConfig) -> Self {
        Self { config }
    }

    /// Parse configuration from JSON
    ///
    /// The config parameter exists for API consistency but has no effect.
    fn _parse_config(params: &Value) -> Result<RemoveDoctypeConfig> {
        if params.is_object() {
            serde_json::from_value(params.clone())
                .map_err(|e| anyhow::anyhow!("Invalid configuration: {}", e))
        } else {
            Ok(RemoveDoctypeConfig::default())
        }
    }
}

impl Default for RemoveDoctypePlugin {
    fn default() -> Self {
        Self::new()
    }
}

impl Plugin for RemoveDoctypePlugin {
    fn name(&self) -> &'static str {
        "removeDoctype"
    }

    fn description(&self) -> &'static str {
        "removes doctype declaration"
    }

    fn validate_params(&self, params: &Value) -> Result<()> {
        if let Some(obj) = params.as_object() {
            if !obj.is_empty() {
                return Err(anyhow::anyhow!(
                    "removeDoctype plugin does not accept any parameters"
                ));
            }
        }
        Ok(())
    }

    /// Apply DOCTYPE removal to the entire SVG document
    ///
    /// Processes multiple locations where DOCTYPE nodes may appear:
    ///
    /// 1. **Prologue** (primary location) - DOCTYPE normally appears here before `<svg>`
    /// 2. **Root children** (defensive) - Handles malformed SVG with misplaced DOCTYPE
    /// 3. **Epilogue** (defensive) - Cleanup for completeness
    ///
    /// After removing DOCTYPE nodes, cleans up any leftover formatting whitespace
    /// to prevent visual artifacts in the output.
    fn apply(&self, document: &mut Document) -> Result<()> {
        // Remove all DOCTYPE nodes from the document prologue (standard location)
        document
            .prologue
            .retain(|child| !matches!(child, Node::DocType(_)));
        Self::cleanup_formatting_whitespace(&mut document.prologue);

        // Also remove DOCTYPE nodes from root children (defensive: handles misplaced DOCTYPEs)
        document
            .root
            .children
            .retain(|child| !matches!(child, Node::DocType(_)));
        Self::cleanup_formatting_whitespace_recursive(&mut document.root);

        // Clean up epilogue as well (completeness)
        Self::cleanup_formatting_whitespace(&mut document.epilogue);

        Ok(())
    }
}

#[cfg(test)]
mod unit_tests {
    use std::borrow::Cow;

    use serde_json::json;
    use vexy_vsvg::ast::{Document, Element, Node};

    use super::*;

    fn create_element(name: &'static str) -> Element<'static> {
        let mut element = Element::new(name);
        element.name = Cow::Borrowed(name);
        element
    }

    #[test]
    fn test_plugin_creation() {
        let plugin = RemoveDoctypePlugin::new();
        assert_eq!(plugin.name(), "removeDoctype");
        assert_eq!(plugin.description(), "removes doctype declaration");
    }

    #[test]
    fn test_parameter_validation() {
        let plugin = RemoveDoctypePlugin::new();

        // Valid parameters (empty object)
        assert!(plugin.validate_params(&json!({})).is_ok());

        // Invalid parameters (non-empty object)
        assert!(plugin.validate_params(&json!({"param": "value"})).is_err());
    }

    #[test]
    fn test_remove_doctype() {
        let plugin = RemoveDoctypePlugin::new();
        let mut doc = Document::new();

        // Add a DOCTYPE node
        doc.root.children.push(Node::DocType(
            "svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\"".into(),
        ));

        // Add a regular element
        let svg = create_element("svg");
        doc.root.children.push(Node::Element(svg));

        // Apply plugin
        plugin.apply(&mut doc).unwrap();

        // Should have removed the DOCTYPE node
        assert_eq!(doc.root.children.len(), 1);
        assert!(matches!(doc.root.children[0], Node::Element(_)));
    }

    #[test]
    fn test_multiple_doctypes() {
        let plugin = RemoveDoctypePlugin::new();
        let mut doc = Document::new();

        // Add multiple DOCTYPE nodes
        doc.root.children.push(Node::DocType(
            "svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\"".into(),
        ));
        doc.root.children.push(Node::DocType("html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\"".into()));

        // Add a regular element
        let svg = create_element("svg");
        doc.root.children.push(Node::Element(svg));

        // Apply plugin
        plugin.apply(&mut doc).unwrap();

        // Should have removed all DOCTYPE nodes
        assert_eq!(doc.root.children.len(), 1);
        assert!(matches!(doc.root.children[0], Node::Element(_)));
    }

    #[test]
    fn test_no_doctype() {
        let plugin = RemoveDoctypePlugin::new();
        let mut doc = Document::new();

        // Add only regular elements
        let svg = create_element("svg");
        doc.root.children.push(Node::Element(svg));

        // Apply plugin
        plugin.apply(&mut doc).unwrap();

        // Should have no changes
        assert_eq!(doc.root.children.len(), 1);
        assert!(matches!(doc.root.children[0], Node::Element(_)));
    }

    #[test]
    fn test_doctype_with_text() {
        let plugin = RemoveDoctypePlugin::new();
        let mut doc = Document::new();

        // Add DOCTYPE, text, and element
        doc.root.children.push(Node::DocType(
            "svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"".into(),
        ));
        doc.root.children.push(Node::Text("Some text".into()));
        let svg = create_element("svg");
        doc.root.children.push(Node::Element(svg));

        // Apply plugin
        plugin.apply(&mut doc).unwrap();

        // Should have removed only the DOCTYPE node
        assert_eq!(doc.root.children.len(), 2);
        assert!(matches!(doc.root.children[0], Node::Text(_)));
        assert!(matches!(doc.root.children[1], Node::Element(_)));
    }

    #[test]
    fn test_config_parsing() {
        let config = RemoveDoctypePlugin::_parse_config(&json!({})).unwrap();
        // No fields to check since config is empty
        let _ = config;
    }
}

// Use parameterized testing framework for SVGO fixture tests
#[cfg(test)]
#[cfg(test)]
vexy_vsvg_test_utils::plugin_fixture_tests!(RemoveDoctypePlugin, "removeDoctype");