Skip to main content

netgauze_ipfix_code_generator/xml_parsers/
xml_common.rs

1// Copyright (C) 2022-present The NetGauze Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//    http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12// implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16use crate::Xref;
17use regex::{Captures, Regex, Replacer};
18use roxmltree::{ExpandedName, Node};
19
20pub const IANA_NAMESPACE: &str = "http://www.iana.org/assignments";
21pub const UNASSIGNED: &str = "Unassigned";
22pub const RESERVED: &str = "Reserved";
23pub const UNKNOWN: &str = "Unknown";
24pub const PRIVATE: &str = "Private";
25pub const EXPERIMENTATION: &str = "experimentation";
26
27pub struct RfcLinkSwapper;
28impl Replacer for RfcLinkSwapper {
29    fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
30        dst.push_str("[RFC");
31        dst.push_str(&caps["RFCNUM"]);
32        dst.push_str("](https://datatracker.ietf.org/doc/rfc");
33        dst.push_str(&caps["RFCNUM"]);
34        dst.push(')');
35    }
36}
37
38pub struct HttpLinkSwapper;
39impl Replacer for HttpLinkSwapper {
40    fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
41        dst.push('<');
42        dst.push_str(&caps["href"]);
43        dst.push('>');
44    }
45}
46
47/// Find descendant node by it's ID
48/// If multiple nodes with the same ID exists, the first one is returned
49pub(crate) fn find_node_by_id<'a, 'input>(
50    node: &'input Node<'a, 'input>,
51    id: &str,
52) -> Option<Node<'a, 'input>> {
53    node.descendants().find(|x| x.attribute("id") == Some(id))
54}
55
56/// Find descendant node with ID matching on a regex
57pub(crate) fn find_node_by_regex<'a, 'input>(
58    node: &'input Node<'a, 'input>,
59    regex: &Regex,
60) -> Option<Node<'a, 'input>> {
61    node.children().find(|x| {
62        !x.is_root()
63            && x.attribute("id")
64                .map(|id| regex.is_match(id))
65                .unwrap_or(false)
66    })
67}
68
69/// Return all children nodes with ID matching the regex (ignoring root node)
70pub(crate) fn find_nodes_by_regex<'a, 'input>(
71    node: &'input Node<'a, 'input>,
72    regex: &Regex,
73) -> Vec<Node<'a, 'input>> {
74    node.children()
75        .filter(|x| {
76            !x.is_root()
77                && x.attribute("id")
78                    .map(|id| regex.is_match(id))
79                    .unwrap_or(false)
80        })
81        .collect()
82}
83
84/// Get the text value of an XML node if applicable
85/// For example `<a>bb</a>` returns `Some("bb".to_string())`,
86/// while `<a><b/></a>` returns `None`
87pub fn get_string_child(node: &Node<'_, '_>, tag_name: ExpandedName<'_, '_>) -> Option<String> {
88    node.children()
89        .find(|x| x.tag_name() == tag_name)
90        .map(|x| x.text().map(|txt| txt.trim().to_string()))
91        .unwrap_or_default()
92}
93
94/// Parse tags such as `<xref type="rfc">rfc1233</xref>`
95pub fn parse_xref(node: &Node<'_, '_>) -> Vec<Xref> {
96    let children = node
97        .children()
98        .filter(|x| x.tag_name() == (IANA_NAMESPACE, "xref").into())
99        .collect::<Vec<_>>();
100    let mut xrefs = Vec::new();
101    for child in children {
102        let ty = child.attribute("type").map(ToString::to_string);
103        let data = child.attribute("data").map(ToString::to_string);
104        if let (Some(ty), Some(data)) = (ty, data) {
105            xrefs.push(Xref { ty, data });
106        }
107    }
108    xrefs
109}
110
111fn replace_first_numeric_char(value: &str) -> String {
112    match value.chars().next() {
113        Some('0') => "Zero".to_string() + &value[1..],
114        Some('1') => "One".to_string() + &value[1..],
115        Some('2') => "Two".to_string() + &value[1..],
116        Some('3') => "Three".to_string() + &value[1..],
117        Some('4') => "Four".to_string() + &value[1..],
118        Some('5') => "Five".to_string() + &value[1..],
119        Some('6') => "Six".to_string() + &value[1..],
120        Some('7') => "Seven".to_string() + &value[1..],
121        Some('8') => "Eight".to_string() + &value[1..],
122        Some('9') => "Nine".to_string() + &value[1..],
123        _ => value.to_string(),
124    }
125}
126
127/// Convert a description string to a usable enum type name
128/// Use e.g. for registries missing a name field where we need
129/// to use a (possibly complex) description string.
130///
131/// - removes line breaks and trimming
132/// - only selects text preceding any ":", useful for e.g. [IPFIX MPLS label type (Value 46)](https://www.iana.org/assignments/ipfix/ipfix.xhtml#ipfix-mpls-label-type)
133/// - removes ascii punctuation
134/// - removes spaces
135/// - replaces first numeric char (e.g. 3PC --> ThreePC)
136///
137/// TODO: feedback to Benoit
138pub fn xml_string_to_enum_type(input: &str) -> (usize, String) {
139    // Multiline --> one-line
140    let str_one_line = input
141        .lines()
142        .map(|line| line.trim())
143        .collect::<Vec<&str>>()
144        .join(" ")
145        .to_string();
146
147    // Select text before ":" if that's present
148    let str_before_column = str_one_line
149        .split(':')
150        .next()
151        .unwrap_or(&str_one_line)
152        .trim()
153        .to_string();
154
155    // Remove spaces
156    let str_words: Vec<&str> = str_before_column.split_whitespace().collect();
157    let str_words_amount = str_words.len();
158    let mut str_without_spaces = str_before_column
159        .chars()
160        .filter(|c| !c.is_whitespace() && !c.is_ascii_punctuation())
161        .collect::<String>();
162
163    // Replace first numeric char if we have one
164    if let Some(first_char) = str_without_spaces.chars().next()
165        && first_char.is_numeric()
166    {
167        str_without_spaces = replace_first_numeric_char(&str_without_spaces);
168    }
169
170    (str_words_amount, str_without_spaces)
171}
172
173/// Parse a simple description string
174pub fn parse_simple_description_string(node: &Node<'_, '_>) -> Option<String> {
175    if let Some(description) = node
176        .children()
177        .find(|x| x.tag_name() == (IANA_NAMESPACE, "description").into())
178    {
179        let mut desc_text = String::new();
180        let body = description.text().map(|txt| txt.trim().to_string());
181
182        if let Some(body) = body
183            && !body.trim().is_empty()
184        {
185            desc_text.push_str(body.trim());
186        }
187        let re = Regex::new(r"\[RFC(?<RFCNUM>\d+)]").unwrap();
188        let desc_text = re.replace(&desc_text, RfcLinkSwapper).to_string();
189        let re = Regex::new(r"(?<href>https?://(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,4}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*))").unwrap();
190        let desc_text = re.replace(&desc_text, HttpLinkSwapper);
191        Some(desc_text.to_string())
192    } else {
193        None
194    }
195}