edifact_types/util/
mod.rs

1use nom::{
2    branch::alt,
3    bytes::complete::{escaped, is_not, tag},
4    character::complete::{newline, one_of},
5    combinator::opt,
6    multi::{many0, separated_list0},
7    sequence::{delimited, terminated},
8    IResult, Parser as _,
9};
10
11pub fn clean_num(mut input: &str) -> &str {
12    // make sure whitespace is removed
13    input = input.trim();
14    // make sure leading zeros are removed, up to 1 digit
15    while input.starts_with('0') && input.len() > 1 {
16        input = input.strip_prefix('0').expect("Strip prefix 0 failed!");
17    }
18
19    input
20}
21
22pub fn parse_line<'a>(input: &'a str, segment_name: &str) -> IResult<&'a str, Vec<&'a str>> {
23    let tag_name = format!("{segment_name}+");
24    let (rest, vars) = delimited(
25        tag(tag_name.as_str()),
26        escaped(is_not("?'"), '?', one_of(r#":?+'"#)),
27        tag("'"),
28    ).parse(input)?;
29    let (_, vars) = crate::util::parse_plus_section(vars)?;
30    // empty lines (double '') should throw error
31    // also look for trailing newline and remove it
32    #[allow(unused_variables)]
33    let (rest, empty_line) = terminated(opt(tag("'")), many0(newline)).parse(rest)?;
34    #[cfg(feature = "logging")]
35    if empty_line.is_some() {
36        log::warn!("Found empty line (ends with '') -> ignored");
37    }
38
39    Ok((rest, vars))
40}
41
42pub fn parse_plus_section(input: &str) -> IResult<&str, Vec<&str>> {
43    let (rest, vars) = separated_list0(
44        tag("+"),
45        alt((escaped(is_not("?+"), '?', one_of(r#":?+'"#)), tag(""))),
46    ).parse(input)?;
47    Ok((rest, vars))
48}
49
50pub fn parse_colon_section(input: &str) -> IResult<&str, Vec<&str>> {
51    let (rest, vars) = separated_list0(
52        tag(":"),
53        alt((escaped(is_not("?:"), '?', one_of(r#":?+'"#)), tag(""))),
54    ).parse(input)?;
55    Ok((rest, vars))
56}
57
58pub trait Parser<I, O, E> {
59    fn parse(str: I) -> IResult<I, O>;
60}
61
62pub fn unborrow_string(input: &&str) -> String {
63    input.to_string()
64}
65
66#[cfg(test)]
67mod test {
68    use super::*;
69    use nom::character::complete::not_line_ending;
70    use regex::Regex;
71    use std::{collections::HashMap, fs};
72
73    #[test]
74    fn parse_line_test() {
75        let input_str = r#"UNH+3757?'651?+IFTSTA?:D:0??0B:UN'"#;
76        println!("### input ##\n {input_str:?}");
77        let (rest, line_vars) = parse_line(input_str, "UNH").unwrap();
78        println!("### vars ##\n {line_vars:?}");
79        println!("### rest ##\n {rest:?}");
80        assert!(rest.is_empty());
81    }
82
83    #[test]
84    fn parse_plus_section_test() {
85        let input_str = r#"3757?'651?+IFTSTA?:D:0??0B:UN+123+hello?+??world+goodbye"#;
86        println!("### input ##\n {input_str:?}");
87        let (rest, vars) = parse_plus_section(input_str).unwrap();
88        println!("### vars ##\n {vars:?}");
89        println!("### rest ##\n {rest:?}");
90        assert!(rest.is_empty());
91    }
92
93    #[test]
94    fn parse_colon_section_test() {
95        let input_str = r#"IFTSTA?:D:0??0B:UN"#;
96        println!("### input ##\n {input_str:?}");
97        let (rest, vars) = parse_colon_section(input_str).unwrap();
98        println!("### vars ##\n {vars:?}");
99        println!("### rest ##\n {rest:?}");
100        assert!(rest.is_empty());
101    }
102
103    fn line_parser(i: &str) -> IResult<&str, &str> {
104        let (rest, _) = opt(newline).parse(i)?;
105        let (rest, line) = not_line_ending(rest)?;
106        Ok((rest, line))
107    }
108
109    fn internal_line(i: &str, re: Regex) -> Option<String> {
110        let mut new_lines = vec![];
111        // println!("internal_line: {i}");
112        for (_, [tag, _name, req, repeat]) in re.captures_iter(i).map(|c| c.extract()) {
113            let right_side = if repeat == "1" {
114                if req == "M" {
115                    tag.to_string()
116                } else {
117                    format!("Option<{tag}>")
118                }
119            } else {
120                format!("Vec<{tag}>")
121            };
122            new_lines.push(format!("{}: {right_side},", tag.to_lowercase()));
123        }
124        new_lines.first().cloned()
125    }
126
127    fn internal_group(i: &str, re: Regex) -> Option<(String, String)> {
128        let mut new_group = vec![];
129        // println!("internal_group: {i}");
130        for (_, [name, req, repeat]) in re.captures_iter(i).map(|c| c.extract()) {
131            let struct_name = format!("{}{}", MSG_TYPE, name.replace(' ', ""));
132            let handle = name.replace(' ', "_").to_lowercase();
133            let group_open = if repeat == "1" {
134                if req == "M" {
135                    format!("{handle}: {struct_name},")
136                } else {
137                    format!("{handle}: Option<{struct_name}>,")
138                }
139            } else {
140                format!("{handle}: Vec<{struct_name}>,")
141            };
142            new_group.push((group_open, struct_name))
143        }
144        new_group.first().cloned()
145    }
146
147    /////////////////
148    /// Change this for your messagetype
149    /// ////////////
150    const VERSION: &str = "d00b";
151    const MSG_TYPE: &str = "COPARN";
152    #[test]
153    fn parse_edifact_descr_from_file() {
154        let contents = fs::read_to_string(format!("edi_desc/{VERSION}/{MSG_TYPE}"))
155            .expect("Should have been able to read the file");
156        let mut i = contents.as_str();
157        let re_line =
158            Regex::new(r"^\d{4}.+([A-Z]{3})\s+((?:\S+ ){1,})\s+(M|C)\s+(\d{1,4})").unwrap();
159        let re_group = Regex::new(r".*-+ (\S+ ?\S+ ?\S+?)\s+-+ (C|M)\s+(\d{1,4}).*").unwrap();
160        println!("Input:\n\n");
161        let mut final_string: String = format!(
162            "#[derive(Default, Debug, Serialize, Deserialize, DisplayEdifact, ParseMsg)]\npub struct {MSG_TYPE} {{"
163        );
164
165        let mut lines: Vec<&str> = vec![];
166        while !i.is_empty() {
167            let Ok((rest, line)) = line_parser(i) else {
168                panic!("panic'd while reading file")
169            };
170            lines.push(line);
171            i = rest;
172        }
173
174        let mut groups: HashMap<String, String> = HashMap::new();
175        let mut group_level: Vec<bool> = vec![];
176        let mut current_group: Vec<String> = vec![];
177        for line in &lines {
178            // println!("line: {line}");
179
180            // kick out non-parsable lines
181            let parsed_line = internal_line(line, re_line.clone());
182            let parsed_group = internal_group(line, re_group.clone());
183            if parsed_line.is_none() && parsed_group.is_none() {
184                continue;
185            }
186            // figure out how deeply nested we are
187            // println!("group_level: {group_level:?} // current_group: {current_group:?}");
188            let outer = if group_level.is_empty() {
189                0
190            } else {
191                group_level.len() - 1
192            };
193            let (outer_line, _ol_rest) = line.split_at(line.len() - outer);
194            let (inner_line, il_rest) = line.split_at(line.len() - group_level.len());
195            // println!("line_stripped_outer_{outer}: {outer_line}");
196            // println!("line_stripped_outer_{outer}: {ol_rest}");
197            // println!("line_stripped_inner_{}: {inner_line}", group_level.len());
198            // println!("line_stripped_inner_{}: {il_rest}", group_level.len());
199
200            if let Some(inside_group) = group_level.last() {
201                if *inside_group {
202                    // we are in a group and are starting another
203                    if inner_line.ends_with('+') {
204                        // start group recording
205                        println!("group _start: {inner_line}");
206                        if let Some((group_handle, name)) = parsed_group {
207                            let cg = current_group.last().unwrap();
208                            if let Some(g) = groups.get_mut(cg) {
209                                *g = format!("{g}\n    pub {group_handle}");
210                            };
211                            groups.insert(name.clone(), format!("#[derive(Debug, Serialize, Deserialize, DisplayEdifactSg, ParseSg)]\npub struct {name} {{"));
212                            current_group.push(name);
213                        };
214                        group_level.push(true);
215                    } else if outer_line.ends_with('+') {
216                        if let Some(res) = parsed_line {
217                            println!("group ___end: {line}");
218                            let cg = current_group.last().unwrap();
219                            if let Some(g) = groups.get_mut(cg) {
220                                *g = format!("{g}\n    pub {res}");
221                            };
222                        };
223                        // end group recording, can be more than one group
224                        let mut loopy = il_rest;
225                        // println!("loopy_start: {loopy}");
226                        while loopy.starts_with('+') {
227                            let cg = current_group.last().unwrap();
228                            if let Some(g) = groups.get_mut(cg) {
229                                *g = format!("{g}\n}}\n");
230                            };
231                            group_level.pop();
232                            current_group.pop();
233                            loopy = loopy.strip_prefix('+').unwrap_or(loopy);
234
235                            // println!("loopy: {loopy}");
236                            // println!("loopy group_level: {group_level:?} // current_group: {current_group:?}");
237                        }
238                    } else {
239                        // inside group
240                        println!("group middle: {line}");
241                        if let Some(res) = parsed_line {
242                            if let Some(cg) = current_group.last() {
243                                if let Some(g) = groups.get_mut(cg) {
244                                    *g = format!("{g}\n    pub {res}");
245                                };
246                            } else {
247                                println!("{line} -> this is inside a group, but not parsed")
248                            };
249                        };
250                    }
251                }
252            } else {
253                // we are starting off with a new group
254                if outer_line.ends_with('+') {
255                    // start group recording
256                    println!("group ___new: {outer_line}");
257                    if let Some((group_handle, name)) = parsed_group {
258                        final_string = format!("{final_string}\n    pub {group_handle}");
259                        groups.insert(name.clone(), format!("#[derive(Debug, Serialize, Deserialize, DisplayEdifactSg, ParseSg)]\npub struct {name} {{"));
260                        current_group.push(name);
261                    };
262                    group_level.push(true);
263                    continue;
264                }
265                // outside group
266                if let Some(res) = parsed_line {
267                    println!("normal _____: {line}");
268                    final_string = format!("{final_string}\n    pub {res}");
269                }
270            }
271        }
272        let u = format!(
273            "use crate::{VERSION}::*;
274use edifact_types_macros::{{DisplayEdifact, DisplayEdifactSg, ParseMsg, ParseSg}};
275use serde::{{Deserialize, Serialize}};
276use std::fmt;"
277        );
278        final_string = format!("{u}\n\n{final_string}\n}}\n");
279        let mut sorted: Vec<_> = groups.iter().collect();
280        sorted.sort_by_key(|a| a.0);
281        sorted
282            .iter()
283            .for_each(|g| final_string = format!("{final_string}\n{}", g.1));
284        // fs::write(
285        //     format!("src/{VERSION}/message/{}.rs", MSG_TYPE.to_lowercase()),
286        //     final_string.clone(),
287        // )
288        // .unwrap();
289        println!("Output:\n\n{final_string}");
290    }
291}