Skip to main content

fiscal_core/
xml_utils.rs

1//! Low-level XML building primitives used throughout the crate.
2//!
3//! These utilities are deliberately simple and allocation-efficient: they work
4//! on `&str` slices and return owned `String`s, with no external XML library
5//! dependency.
6
7/// Escape special XML characters in text content and attribute values,
8/// replacing `&`, `<`, `>`, `"`, and `'` with their XML entity equivalents.
9///
10/// # Examples
11///
12/// ```
13/// use fiscal_core::xml_utils::escape_xml;
14/// assert_eq!(escape_xml("Tom & Jerry <cats>"), "Tom &amp; Jerry &lt;cats&gt;");
15/// ```
16pub fn escape_xml(s: &str) -> String {
17    let mut result = String::with_capacity(s.len());
18    for ch in s.chars() {
19        match ch {
20            '&' => result.push_str("&amp;"),
21            '<' => result.push_str("&lt;"),
22            '>' => result.push_str("&gt;"),
23            '"' => result.push_str("&quot;"),
24            '\'' => result.push_str("&apos;"),
25            c => result.push(c),
26        }
27    }
28    result
29}
30
31/// Extract the text content of the first occurrence of a simple XML tag in a
32/// raw XML string.
33///
34/// Searches for `<tag_name>…</tag_name>` and returns the inner text.  Does not
35/// handle namespaced tags, nested tags of the same name, or CDATA sections.
36///
37/// Returns `None` if the tag is absent.
38///
39/// # Examples
40///
41/// ```
42/// use fiscal_core::xml_utils::extract_xml_tag_value;
43/// let xml = "<root><cStat>100</cStat></root>";
44/// assert_eq!(extract_xml_tag_value(xml, "cStat"), Some("100".to_string()));
45/// assert_eq!(extract_xml_tag_value(xml, "missing"), None);
46/// ```
47pub fn extract_xml_tag_value(xml: &str, tag_name: &str) -> Option<String> {
48    let open = format!("<{tag_name}>");
49    let close = format!("</{tag_name}>");
50    let start = xml.find(&open)? + open.len();
51    let end = xml[start..].find(&close)? + start;
52    Some(xml[start..end].to_string())
53}
54
55/// Build an XML tag with optional attributes and children.
56///
57/// If children is a string, it is escaped. If children is an array
58/// of pre-built strings, they are concatenated as-is.
59pub fn tag(name: &str, attrs: &[(&str, &str)], children: TagContent<'_>) -> String {
60    let attr_str: String = attrs
61        .iter()
62        .map(|(k, v)| format!(" {k}=\"{}\"", escape_xml(v)))
63        .collect();
64
65    match children {
66        TagContent::None => format!("<{name}{attr_str}></{name}>"),
67        TagContent::Text(text) => {
68            format!("<{name}{attr_str}>{}</{name}>", escape_xml(text))
69        }
70        TagContent::Children(kids) => {
71            let inner: String = kids.into_iter().collect();
72            format!("<{name}{attr_str}>{inner}</{name}>")
73        }
74    }
75}
76
77/// Content variants for the [`tag`] builder function.
78///
79/// Use [`TagContent::None`] for self-closing elements, [`TagContent::Text`]
80/// for text nodes (automatically XML-escaped), and [`TagContent::Children`]
81/// for pre-built child element strings.
82#[non_exhaustive]
83pub enum TagContent<'a> {
84    /// Empty element: `<name></name>`.
85    None,
86    /// Text content (will be XML-escaped): `<name>text</name>`.
87    Text(&'a str),
88    /// Pre-built child elements concatenated verbatim: `<name><a/><b/></name>`.
89    Children(Vec<String>),
90}
91
92impl<'a> From<&'a str> for TagContent<'a> {
93    fn from(s: &'a str) -> Self {
94        TagContent::Text(s)
95    }
96}
97
98impl From<Vec<String>> for TagContent<'_> {
99    fn from(v: Vec<String>) -> Self {
100        TagContent::Children(v)
101    }
102}
103
104impl From<String> for TagContent<'_> {
105    fn from(s: String) -> Self {
106        TagContent::Text(Box::leak(s.into_boxed_str()))
107    }
108}
109
110/// Pretty-print an XML string by adding indentation.
111///
112/// This is a lightweight formatter that does not parse XML semantically --
113/// it works by splitting on `<` / `>` boundaries and inserting newlines and
114/// indentation. Suitable for debugging/display purposes. Equivalent to the
115/// PHP `FakePretty::prettyPrint` formatting behaviour (via DOMDocument::formatOutput).
116///
117/// # Examples
118///
119/// ```
120/// use fiscal_core::xml_utils::pretty_print_xml;
121/// let compact = "<root><child>text</child></root>";
122/// let pretty = pretty_print_xml(compact);
123/// assert!(pretty.contains("  <child>"));
124/// ```
125pub fn pretty_print_xml(xml: &str) -> String {
126    // Tokenise into tags and text segments
127    let mut tokens: Vec<XmlToken> = Vec::new();
128    let mut pos = 0;
129    let bytes = xml.as_bytes();
130
131    while pos < bytes.len() {
132        if bytes[pos] == b'<' {
133            // Find end of tag
134            let end = xml[pos..]
135                .find('>')
136                .map(|i| pos + i + 1)
137                .unwrap_or(bytes.len());
138            tokens.push(XmlToken::Tag(xml[pos..end].to_string()));
139            pos = end;
140        } else {
141            // Text until next '<'
142            let end = xml[pos..].find('<').map(|i| pos + i).unwrap_or(bytes.len());
143            let text = &xml[pos..end];
144            if !text.trim().is_empty() {
145                tokens.push(XmlToken::Text(text.trim().to_string()));
146            }
147            pos = end;
148        }
149    }
150
151    // Now render with indentation
152    let indent = "  ";
153    let mut result = String::with_capacity(xml.len() * 2);
154    let mut depth: usize = 0;
155
156    let mut i = 0;
157    while i < tokens.len() {
158        match &tokens[i] {
159            XmlToken::Tag(t) if t.starts_with("<?") => {
160                // XML declaration
161                result.push_str(t);
162                result.push('\n');
163            }
164            XmlToken::Tag(t) if t.starts_with("</") => {
165                // Closing tag
166                depth = depth.saturating_sub(1);
167                for _ in 0..depth {
168                    result.push_str(indent);
169                }
170                result.push_str(t);
171                result.push('\n');
172            }
173            XmlToken::Tag(t) if t.ends_with("/>") => {
174                // Self-closing tag
175                for _ in 0..depth {
176                    result.push_str(indent);
177                }
178                result.push_str(t);
179                result.push('\n');
180            }
181            XmlToken::Tag(t) => {
182                // Opening tag -- check if next token is Text followed by closing tag
183                if i + 2 < tokens.len() {
184                    if let (XmlToken::Text(text), XmlToken::Tag(close)) =
185                        (&tokens[i + 1], &tokens[i + 2])
186                    {
187                        if close.starts_with("</") {
188                            // Inline text element: <tag>text</tag>
189                            for _ in 0..depth {
190                                result.push_str(indent);
191                            }
192                            result.push_str(t);
193                            result.push_str(text);
194                            result.push_str(close);
195                            result.push('\n');
196                            i += 3;
197                            continue;
198                        }
199                    }
200                }
201                for _ in 0..depth {
202                    result.push_str(indent);
203                }
204                result.push_str(t);
205                result.push('\n');
206                depth += 1;
207            }
208            XmlToken::Text(t) => {
209                // Standalone text (unusual)
210                for _ in 0..depth {
211                    result.push_str(indent);
212                }
213                result.push_str(t);
214                result.push('\n');
215            }
216        }
217        i += 1;
218    }
219
220    // Remove trailing newline
221    while result.ends_with('\n') {
222        result.pop();
223    }
224    result
225}
226
227/// Internal token type for XML pretty-printing.
228enum XmlToken {
229    Tag(String),
230    Text(String),
231}
232
233/// Validate an NF-e XML string by checking for the presence of required tags.
234///
235/// This is a lightweight structural validator that checks for mandatory tags
236/// in the NF-e/NFC-e XML. It does **not** perform full XSD schema validation
237/// (which would require shipping XSD files and a full XML schema parser), but
238/// covers the most common errors that would cause SEFAZ rejection.
239///
240/// Validated items:
241/// - Required root structure (`<NFe>`, `<infNFe>`)
242/// - Required `<ide>` fields (cUF, cNF, natOp, mod, serie, nNF, dhEmi, tpNF, etc.)
243/// - Required `<emit>` fields (CNPJ/CPF, xNome, enderEmit, IE, CRT)
244/// - Required `<det>` with at least one item
245/// - Required `<total>` / `<ICMSTot>`
246/// - Required `<transp>` and `<pag>`
247/// - Access key format (44 digits)
248///
249/// # Errors
250///
251/// Returns [`FiscalError::XmlParsing`] with a description of all missing tags.
252///
253/// # Examples
254///
255/// ```
256/// use fiscal_core::xml_utils::validate_xml;
257/// let xml = "<NFe><infNFe>...</infNFe></NFe>";
258/// // Will return an error listing all missing required tags
259/// assert!(validate_xml(xml).is_err());
260/// ```
261pub fn validate_xml(xml: &str) -> Result<(), crate::FiscalError> {
262    let mut errors: Vec<String> = Vec::new();
263
264    // Check root structure
265    let required_structure = [
266        ("NFe", "Elemento raiz <NFe> ausente"),
267        ("infNFe", "Elemento <infNFe> ausente"),
268    ];
269    for (tag_name, msg) in &required_structure {
270        if !xml.contains(&format!("<{tag_name}")) {
271            errors.push(msg.to_string());
272        }
273    }
274
275    // Check IDE required tags
276    let ide_tags = [
277        "cUF", "cNF", "natOp", "mod", "serie", "nNF", "dhEmi", "tpNF", "idDest", "cMunFG", "tpImp",
278        "tpEmis", "cDV", "tpAmb", "finNFe", "indFinal", "indPres", "procEmi", "verProc",
279    ];
280    for tag_name in &ide_tags {
281        if extract_xml_tag_value(xml, tag_name).is_none() {
282            errors.push(format!("Tag obrigatória <{tag_name}> ausente em <ide>"));
283        }
284    }
285
286    // Check emit required tags
287    let emit_required = ["xNome", "IE", "CRT"];
288    for tag_name in &emit_required {
289        if extract_xml_tag_value(xml, tag_name).is_none() {
290            errors.push(format!("Tag obrigatória <{tag_name}> ausente em <emit>"));
291        }
292    }
293    // CNPJ or CPF must be present
294    if extract_xml_tag_value(xml, "CNPJ").is_none() && extract_xml_tag_value(xml, "CPF").is_none() {
295        errors.push("Tag <CNPJ> ou <CPF> ausente em <emit>".to_string());
296    }
297
298    // Check required blocks
299    let required_blocks = [
300        ("enderEmit", "Bloco <enderEmit> ausente"),
301        ("det ", "Nenhum item <det> encontrado"),
302        ("total", "Bloco <total> ausente"),
303        ("ICMSTot", "Bloco <ICMSTot> ausente"),
304        ("transp", "Bloco <transp> ausente"),
305        ("pag", "Bloco <pag> ausente"),
306    ];
307    for (fragment, msg) in &required_blocks {
308        if !xml.contains(&format!("<{fragment}")) {
309            errors.push(msg.to_string());
310        }
311    }
312
313    // Validate access key format (44 digits) from infNFe Id attribute
314    if let Some(id_start) = xml.find("Id=\"NFe") {
315        let after_id = &xml[id_start + 7..];
316        if let Some(quote_end) = after_id.find('"') {
317            let key = &after_id[..quote_end];
318            if key.len() != 44 || !key.chars().all(|c| c.is_ascii_digit()) {
319                errors.push(format!(
320                    "Chave de acesso inválida: esperado 44 dígitos, encontrado '{key}'"
321                ));
322            }
323        }
324    }
325
326    if errors.is_empty() {
327        Ok(())
328    } else {
329        Err(crate::FiscalError::XmlParsing(errors.join("; ")))
330    }
331}
332
333#[cfg(test)]
334mod tests {
335    use super::*;
336
337    #[test]
338    fn pretty_print_simple_xml() {
339        let compact = "<root><child>text</child></root>";
340        let pretty = pretty_print_xml(compact);
341        assert!(pretty.contains("<root>"));
342        assert!(pretty.contains("  <child>text</child>"));
343        assert!(pretty.contains("</root>"));
344    }
345
346    #[test]
347    fn pretty_print_nested_xml() {
348        let compact = "<a><b><c>val</c></b></a>";
349        let pretty = pretty_print_xml(compact);
350        let lines: Vec<&str> = pretty.lines().collect();
351        assert_eq!(lines[0], "<a>");
352        assert_eq!(lines[1], "  <b>");
353        assert_eq!(lines[2], "    <c>val</c>");
354        assert_eq!(lines[3], "  </b>");
355        assert_eq!(lines[4], "</a>");
356    }
357
358    #[test]
359    fn pretty_print_with_declaration() {
360        let xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><root><a>1</a></root>";
361        let pretty = pretty_print_xml(xml);
362        assert!(pretty.starts_with("<?xml"));
363        assert!(pretty.contains("  <a>1</a>"));
364    }
365
366    #[test]
367    fn pretty_print_empty_input() {
368        let pretty = pretty_print_xml("");
369        assert_eq!(pretty, "");
370    }
371
372    #[test]
373    fn validate_xml_valid_nfe() {
374        let xml = concat!(
375            r#"<NFe><infNFe versao="4.00" Id="NFe41260304123456000190550010000001231123456780">"#,
376            "<ide><cUF>41</cUF><cNF>12345678</cNF><natOp>VENDA</natOp>",
377            "<mod>55</mod><serie>1</serie><nNF>123</nNF>",
378            "<dhEmi>2026-03-11T10:30:00-03:00</dhEmi>",
379            "<tpNF>1</tpNF><idDest>1</idDest><cMunFG>4106902</cMunFG>",
380            "<tpImp>1</tpImp><tpEmis>1</tpEmis><cDV>0</cDV>",
381            "<tpAmb>2</tpAmb><finNFe>1</finNFe><indFinal>1</indFinal>",
382            "<indPres>1</indPres><procEmi>0</procEmi><verProc>1.0</verProc></ide>",
383            "<emit><CNPJ>04123456000190</CNPJ><xNome>Test</xNome>",
384            "<enderEmit><xLgr>Rua</xLgr></enderEmit>",
385            "<IE>9012345678</IE><CRT>3</CRT></emit>",
386            "<det nItem=\"1\"><prod><cProd>001</cProd></prod></det>",
387            "<total><ICMSTot><vNF>150.00</vNF></ICMSTot></total>",
388            "<transp><modFrete>9</modFrete></transp>",
389            "<pag><detPag><tPag>01</tPag><vPag>150.00</vPag></detPag></pag>",
390            "</infNFe></NFe>",
391        );
392        assert!(validate_xml(xml).is_ok());
393    }
394
395    #[test]
396    fn validate_xml_missing_tags() {
397        let xml = "<root><something>val</something></root>";
398        let err = validate_xml(xml).unwrap_err();
399        let msg = err.to_string();
400        assert!(msg.contains("NFe"));
401        assert!(msg.contains("infNFe"));
402    }
403
404    #[test]
405    fn validate_xml_invalid_access_key() {
406        let xml = concat!(
407            r#"<NFe><infNFe versao="4.00" Id="NFe123">"#,
408            "<ide><cUF>41</cUF><cNF>12345678</cNF><natOp>VENDA</natOp>",
409            "<mod>55</mod><serie>1</serie><nNF>123</nNF>",
410            "<dhEmi>2026-03-11T10:30:00-03:00</dhEmi>",
411            "<tpNF>1</tpNF><idDest>1</idDest><cMunFG>4106902</cMunFG>",
412            "<tpImp>1</tpImp><tpEmis>1</tpEmis><cDV>0</cDV>",
413            "<tpAmb>2</tpAmb><finNFe>1</finNFe><indFinal>1</indFinal>",
414            "<indPres>1</indPres><procEmi>0</procEmi><verProc>1.0</verProc></ide>",
415            "<emit><CNPJ>04123456000190</CNPJ><xNome>Test</xNome>",
416            "<enderEmit><xLgr>Rua</xLgr></enderEmit>",
417            "<IE>9012345678</IE><CRT>3</CRT></emit>",
418            "<det nItem=\"1\"><prod><cProd>001</cProd></prod></det>",
419            "<total><ICMSTot><vNF>150.00</vNF></ICMSTot></total>",
420            "<transp><modFrete>9</modFrete></transp>",
421            "<pag><detPag><tPag>01</tPag><vPag>150.00</vPag></detPag></pag>",
422            "</infNFe></NFe>",
423        );
424        let err = validate_xml(xml).unwrap_err();
425        let msg = err.to_string();
426        assert!(msg.contains("Chave de acesso"));
427    }
428}