Skip to main content

cyagen/
parser.rs

1use regex::Regex;
2use serde::{Deserialize, Serialize};
3use serde_json;
4
5/// identifiers in the list of "include"
6#[derive(Debug, PartialEq, Serialize, Deserialize)]
7pub struct Include {
8    /// captured string by regex; ex) "#include <stdio.h>"
9    pub captured: String,
10}
11
12/// identifiers in the list of "typedef"
13#[derive(Debug, PartialEq, Serialize, Deserialize)]
14pub struct Typedefs {
15    /// captured string by regex; ex) "typedef enum\n{\n    Idle = 0,\n    Forward,\n    TurnLeft,\n    TurnRight,\n    MaxDirection\n} Direction_t;"
16    pub captured: String,
17}
18
19/// identifiers in the list of "static_vars"
20#[derive(Debug, Serialize, Deserialize)]
21pub struct StaticVariable {
22    /// captured string by regex; ex) "static int array_var\[10\] = { 1, 2, 3 };"
23    pub captured: String,
24    /// variable name expression; ex) "array_var\[10\]"
25    pub name_expr: String,
26    /// variable name; ex) "array_var"
27    pub name: String,
28    /// data type; ex) "int"
29    pub dtype: String,
30    /// true if the variable is declared within a function
31    pub is_local: bool,
32    /// function where the variable is declared
33    pub func_name: String,
34    /// init value; ex) "{ 1, 2, 3 }"
35    pub init: String,
36    /// array size; ex) "10"
37    pub array_size: i32,
38    /// true if the variable is const
39    pub is_const: bool,
40}
41
42/// identifiers in the list of "fncs"
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct Function {
45    /// captured string by regex; ex) "void setDir(const Direction_t dir)\n{"
46    pub captured: String,
47    /// name; ex) "setDir"
48    pub name: String,
49    /// true if the function is static; ex) false
50    pub is_local: bool,
51    /// return data type; ex) "void"
52    pub rtype: String,
53    /// arguments; ex) "const Direction_t dir"
54    pub args: String,
55    /// argument data types; ex) "const Direction_t"
56    pub atypes: String,
57    /// argument names; ex) "dir"
58    pub anames: String,
59}
60
61/// identifiers in the list of "ncls"
62#[derive(Debug, Serialize, Deserialize)]
63pub struct NestedCall {
64    pub callee: Function,
65    pub caller: Function,
66}
67
68/// identifiers which can be used in template
69#[derive(Debug, Serialize, Deserialize)]
70pub struct Parser {
71    pub json_object: serde_json::Value,
72    /// source filename without extension to be parsed
73    pub sourcename: String,
74    /// source file folder
75    pub sourcedirname: String,
76    /// local static variable macro name; LOCAL_STATIC_VARIABLE
77    pub lsv_macro_name: String,
78    /// list of "include"
79    pub incs: Vec<Include>,
80    /// list of "typedef"
81    pub typedefs: Vec<Typedefs>,
82    /// list of static variables
83    pub static_vars: Vec<StaticVariable>,
84    /// list of functions
85    pub fncs: Vec<Function>,
86    /// list of nested calls
87    pub ncls: Vec<NestedCall>,
88    /// list of functions called within the source file
89    pub callees: Vec<Function>,
90}
91
92impl Parser {
93    /// parse the given textdata and return Parse object to be used for generator
94    ///
95    pub fn parse(textdata: &str) -> Self {
96        let code = remove_comments(textdata);
97        let fncs = get_fncs(&code);
98        let ncls = get_ncls(&code, &fncs);
99        let callees: Vec<Function> = get_callees(&ncls);
100        let mut static_vars = get_static_vars(&code, &fncs);
101        let lsv_macro_name = "LOCAL_STATIC_VARIABLE".to_string();
102        update_static_vars_with_lsv(&code, &fncs, &lsv_macro_name, &mut static_vars);
103        Self {
104            json_object: serde_json::json!({}),
105            sourcename: String::new(),
106            sourcedirname: String::new(),
107            lsv_macro_name: lsv_macro_name,
108            incs: get_incs(&code),
109            typedefs: get_typedefs(&code),
110            static_vars: static_vars,
111            fncs: fncs.clone(),
112            ncls: ncls,
113            callees: callees,
114        }
115    }
116}
117
118/// remove comments from C source code
119///
120fn remove_comments(code: &str) -> String {
121    let re = Regex::new(r"(/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+/)|(//.*)").unwrap();
122    re.replace_all(&code, "").to_string()
123}
124
125/// list of inclusion from C source code
126///
127fn get_incs(code: &str) -> Vec<Include> {
128    let mut result = vec![];
129    let re = Regex::new(r#"(?P<captured>#include[\s]+["<].+[">])"#).unwrap();
130    for cap in re.captures_iter(code) {
131        result.push(Include {
132            captured: cap.name("captured").unwrap().as_str().trim().to_string(),
133        });
134    }
135    result.dedup();
136    result
137}
138
139/// list of typedef from C source code
140///
141fn get_typedefs(code: &str) -> Vec<Typedefs> {
142    let mut result = vec![];
143    let re = Regex::new(r#"(?P<captured>typedef\s+(?:.*?\{[.\s\S]*?\}.*?;|[.\s\S]+?;))"#).unwrap();
144    for cap in re.captures_iter(code) {
145        result.push(Typedefs {
146            captured: cap.name("captured").unwrap().as_str().trim().to_string(),
147        });
148    }
149    result.dedup();
150    result
151}
152
153/// update the list of static variables with LOCAL_STATIC_VARIABLE string pattern
154///
155fn update_static_vars_with_lsv(
156    code: &str,
157    fncs: &Vec<Function>,
158    lsv_macro_name: &str,
159    static_vars: &mut Vec<StaticVariable>,
160) {
161    let regex_str = format!(
162        "{}\\((?<fnc_name>\\w+)\\s*,(?<dtype>.*?)\\s*,\\s*(?<name>\\w+)\\s*(?:\\[(?<array_size>.*?)\\])?\\s*,\\s*(?<value>.*?)\\).*?;",
163        &lsv_macro_name
164    );
165    let re = Regex::new(&regex_str).unwrap();
166    for cap in re.captures_iter(code) {
167        let captured = cap.get(0).unwrap().as_str().trim().to_string();
168        let dtype = cap.name("dtype").unwrap().as_str().trim().to_string();
169        let name = cap.name("name").unwrap().as_str().trim().to_string();
170        let array_size = cap
171            .name("array_size")
172            .map_or(0, |c| c.as_str().parse().unwrap_or(0));
173        let init = cap
174            .name("value")
175            .map_or("0", |c| c.as_str().trim())
176            .to_string();
177        let is_const = cap
178            .name("dtype")
179            .map_or(false, |c| c.as_str().to_lowercase().contains("const"));
180        let name_expr = cap.name("array_size").map_or(name.clone(), |c| {
181            (name.clone() + "[" + c.as_str().trim() + "]").to_string()
182        });
183        let mut is_local = false;
184        let mut func_name = String::from("");
185        for func in fncs {
186            if let Some(pos) = code.find(func.captured.as_str()) {
187                let start = pos + code.get(pos..).unwrap().find('{').unwrap() + 1;
188                let stop = find_end_of_func(code, start);
189                let body = code.get(start..stop).unwrap();
190                if body.contains(captured.as_str()) {
191                    is_local = true;
192                    func_name = func.name.to_string();
193                }
194            }
195        }
196        static_vars.push(StaticVariable {
197            captured: captured,
198            name_expr: name_expr,
199            name: name,
200            dtype: dtype,
201            is_local: is_local,
202            func_name: func_name,
203            init: init,
204            array_size: array_size,
205            is_const: is_const,
206        });
207    }
208}
209
210/// list of static variables from C source code
211///
212fn get_static_vars(code: &str, fncs: &Vec<Function>) -> Vec<StaticVariable> {
213    let mut result = vec![];
214    let re = Regex::new(r"(?i)(?<keyword>static\s+|static\s+const\s+|const\s+static\s+)+(?<dtype>.*?)(?<name>\w+)\s*(?:\[(?<array_size>.*?)\])?\s*(?:=\s*(?<value>\{.*?\}|.*?))?;").unwrap();
215    for cap in re.captures_iter(code) {
216        let captured = cap.get(0).unwrap().as_str().trim().to_string();
217        let dtype = cap.name("dtype").unwrap().as_str().trim().to_string();
218        let name = cap.name("name").unwrap().as_str().trim().to_string();
219        let array_size = cap
220            .name("array_size")
221            .map_or(0, |c| c.as_str().parse().unwrap_or(0));
222        let init = cap
223            .name("value")
224            .map_or("0", |c| c.as_str().trim())
225            .to_string();
226        let is_const = cap
227            .name("keyword")
228            .map_or(false, |c| c.as_str().to_lowercase().contains("const"));
229        let name_expr = cap.name("array_size").map_or(name.clone(), |c| {
230            (name.clone() + "[" + c.as_str().trim() + "]").to_string()
231        });
232        let mut is_local = false;
233        let mut func_name = String::from("");
234        for func in fncs {
235            if let Some(pos) = code.find(func.captured.as_str()) {
236                let start = pos + code.get(pos..).unwrap().find('{').unwrap() + 1;
237                let stop = find_end_of_func(code, start);
238                let body = code.get(start..stop).unwrap();
239                if body.contains(captured.as_str()) {
240                    is_local = true;
241                    func_name = func.name.to_string();
242                }
243            }
244        }
245        result.push(StaticVariable {
246            captured: captured,
247            name_expr: name_expr,
248            name: name,
249            dtype: dtype,
250            is_local: is_local,
251            func_name: func_name,
252            init: init,
253            array_size: array_size,
254            is_const: is_const,
255        });
256    }
257    result
258}
259
260/// list of functions from C source code
261///
262fn get_fncs(code: &str) -> Vec<Function> {
263    let mut result = vec![];
264    let re = Regex::new(
265        r"((?<return>\w+[\w\s\*]*\s+)|FUNC\((?<return_ex>[^,]+),[^\)]+\)\s*)(?<name>\w+)[\w]*\s*\((?<args>[^=!><>;\(\)-]*)\)\s*\{"
266    ).unwrap();
267    let get_atypes = |args: String| -> (String, String) {
268        let mut type_list = String::new();
269        let mut name_list = String::new();
270        let mut first_pos = true;
271        let arg_list = args.split(',').collect::<Vec<&str>>();
272        for arg in arg_list {
273            let arg = arg.trim();
274            let re4sep = Regex::new(r"^(?<atype>.*?)(?<aname>\w+(?:\[.*?\])*)$").unwrap();
275            let mut atype;
276            let mut aname;
277            if let Some(cap) = re4sep.captures(&arg) {
278                atype = cap.name("atype").unwrap().as_str().trim().to_string();
279                aname = cap.name("aname").unwrap().as_str().trim().to_string();
280                // relocate 'const' only for 'datatype const *' -> 'const datatype *'
281                let re4const = Regex::new(r"\w[\s\r\n]+const[\s\r\n]*\*").unwrap();
282                if let Some(_) = re4const.captures(&atype) {
283                    atype = atype.replace("const", "");
284                    atype = format!("const {}", atype);
285                    let re4space = Regex::new(r"\s+").unwrap();
286                    atype = re4space.replace_all(&atype, " ").to_string();
287                }
288                if first_pos {
289                    first_pos = false;
290                } else {
291                    type_list.push_str(", ");
292                    name_list.push_str(", ");
293                }
294                type_list.push_str(&atype);
295                let array_dimension = aname.matches("[").count();
296                type_list.push_str(&"*".repeat(array_dimension));
297                // remove '[]' from name string
298                let re4bracket = Regex::new(r"(\[.*?\])+").unwrap();
299                if let Some(_) = re4bracket.captures(&aname) {
300                    aname = re4bracket.replace_all(&aname, "").to_string();
301                }
302                name_list.push_str(&aname);
303            }
304        }
305        if type_list.trim() == "void" {
306            type_list.clear();
307            name_list.clear();
308        }
309        (type_list, name_list)
310    };
311    for cap in re.captures_iter(code) {
312        if cap.name("name").unwrap().as_str().trim() == "if" {
313            continue;
314        }
315        let re4space = Regex::new(r"\s+").unwrap();
316        let mut raw_args = re4space
317            .replace_all(cap.name("args").unwrap().as_str().trim(), " ")
318            .replace("\\", "")
319            .trim()
320            .to_string();
321        if raw_args.trim() == "void" {
322            raw_args.clear();
323        }
324        let (atypes, anames) = get_atypes(raw_args.clone());
325        let rtype = cap
326            .name("return")
327            .or(cap.name("return_ex"))
328            .unwrap()
329            .as_str()
330            .replace("static", "")
331            .replace("STATIC", "")
332            .replace("inline", "")
333            .replace("INLINE", "")
334            .trim()
335            .to_string();
336        result.push(Function {
337            captured: cap.get(0).unwrap().as_str().trim().to_string(),
338            name: cap.name("name").unwrap().as_str().trim().to_string(),
339            is_local: cap
340                .get(0)
341                .unwrap()
342                .as_str()
343                .to_ascii_lowercase()
344                .contains("static"),
345            rtype: rtype,
346            args: raw_args.clone(),
347            atypes: atypes,
348            anames: anames,
349        });
350    }
351    result
352}
353
354/// find end of func
355///
356fn find_end_of_func(code: &str, start: usize) -> usize {
357    let mut stop = start;
358    let mut open = 1;
359    for (i, c) in code.get(start..).unwrap().chars().enumerate() {
360        if c == '}' {
361            open -= 1;
362        } else if c == '{' {
363            open += 1;
364        }
365        if open == 0 {
366            stop += i;
367            break;
368        }
369    }
370    stop
371}
372
373/// list of ncls in C source
374///
375fn get_ncls(code: &str, fncs: &Vec<Function>) -> Vec<NestedCall> {
376    let mut result = vec![];
377    for caller in fncs {
378        if let Some(pos) = code.find(caller.captured.as_str()) {
379            let start = pos + code.get(pos..).unwrap().find('{').unwrap() + 1;
380            let stop = find_end_of_func(code, start);
381            let body = code.get(start..stop).unwrap();
382            for callee in fncs {
383                let call_str = format!("{}(", callee.name);
384                if body.contains(call_str.as_str()) {
385                    result.push(NestedCall {
386                        callee: callee.clone(),
387                        caller: caller.clone(),
388                    });
389                }
390            }
391        }
392    }
393    result
394}
395
396fn find_func_in_list(funcname: &str, fncs: &Vec<Function>) -> bool {
397    let mut result = false;
398    for fnc in fncs {
399        if funcname == fnc.name {
400            result = true;
401            break;
402        }
403    }
404    result
405}
406
407fn get_callees(ncls: &Vec<NestedCall>) -> Vec<Function> {
408    let mut result: Vec<Function> = vec![];
409    for ncl in ncls {
410        if !find_func_in_list(&ncl.callee.name, &result) {
411            result.push(ncl.callee.clone());
412        }
413    }
414    result
415}
416
417#[cfg(test)]
418mod tests {
419    use super::*;
420    use std::fs;
421
422    static TEST_CODE: &'static str = "\
423#include <stdio.h>
424#include <stdio.h>
425#include \"test.h\"
426
427int global_var = 2;
428static char static_var;
429
430// test-comment1
431/* test-comment2 */
432/* 
433    test-comment3
434*/
435
436static inline char local_function(int a);
437
438void main()
439{
440    char c = local_function(20);
441    if (c == 1)
442    {
443        printf(\"no operation\");
444    }
445    else
446    {
447        printf(\"hello world! %c\n\", c);
448    }
449}
450
451static inline char local_function(int a, 
452    int*b )
453{
454    static int local_var[10];
455    return (char)a;
456}
457";
458
459    #[test]
460    fn test_parse() {
461        let code = fs::read_to_string("./example/source/sample.c").unwrap();
462        let parser = Parser::parse(&code);
463        println!("{:#?}", parser);
464    }
465
466    #[test]
467    fn test_remove_comments() {
468        let clean_code = remove_comments(TEST_CODE);
469        assert!(!clean_code.contains("test-comment"));
470    }
471
472    #[test]
473    fn test_get_incs() {
474        let list_incs = get_incs(TEST_CODE);
475        assert_eq!(list_incs[0].captured, "#include <stdio.h>");
476        assert_eq!(list_incs[1].captured, "#include \"test.h\"");
477    }
478
479    #[test]
480    fn test_get_static_vars() {
481        let list_fncs = get_fncs(TEST_CODE);
482        let list_static_vars = get_static_vars(TEST_CODE, &list_fncs);
483        assert_eq!(list_static_vars[0].name, "static_var");
484        assert_eq!(list_static_vars[0].dtype, "char");
485        assert_eq!(list_static_vars[0].is_local, false);
486        assert_eq!(list_static_vars[1].name, "local_var");
487        assert_eq!(list_static_vars[1].name_expr, "local_var[10]");
488        assert_eq!(list_static_vars[1].dtype, "int");
489        assert_eq!(list_static_vars[1].is_local, true);
490        assert_eq!(list_static_vars[1].func_name, "local_function");
491    }
492
493    #[test]
494    fn test_get_fncs() {
495        let list_fncs = get_fncs(TEST_CODE);
496        assert_eq!(list_fncs[0].name, "main");
497        assert_eq!(list_fncs[0].rtype, "void");
498        assert!(!list_fncs[0].is_local);
499        assert_eq!(list_fncs[1].name, "local_function");
500        assert_eq!(list_fncs[1].rtype, "char");
501        assert_eq!(list_fncs[1].atypes, "int, int*");
502        assert!(list_fncs[1].is_local);
503    }
504
505    #[test]
506    fn test_get_ncls() {
507        let list_fncs = get_fncs(TEST_CODE);
508        let list_ncls = get_ncls(TEST_CODE, &list_fncs);
509        if list_ncls.len() > 0 {
510            assert_eq!(list_ncls[0].caller.name, "main");
511            assert_eq!(list_ncls[0].callee.name, "local_function");
512        } else {
513            assert!(false);
514        }
515    }
516}