1use regex::Regex;
2use serde::{Deserialize, Serialize};
3use serde_json;
4
5#[derive(Debug, PartialEq, Serialize, Deserialize)]
7pub struct Include {
8 pub captured: String,
10}
11
12#[derive(Debug, PartialEq, Serialize, Deserialize)]
14pub struct Typedefs {
15 pub captured: String,
17}
18
19#[derive(Debug, Serialize, Deserialize)]
21pub struct StaticVariable {
22 pub captured: String,
24 pub name_expr: String,
26 pub name: String,
28 pub dtype: String,
30 pub is_local: bool,
32 pub func_name: String,
34 pub init: String,
36 pub array_size: i32,
38 pub is_const: bool,
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct Function {
45 pub captured: String,
47 pub name: String,
49 pub is_local: bool,
51 pub rtype: String,
53 pub args: String,
55 pub atypes: String,
57 pub anames: String,
59}
60
61#[derive(Debug, Serialize, Deserialize)]
63pub struct NestedCall {
64 pub callee: Function,
65 pub caller: Function,
66}
67
68#[derive(Debug, Serialize, Deserialize)]
70pub struct Parser {
71 pub json_object: serde_json::Value,
72 pub sourcename: String,
74 pub sourcedirname: String,
76 pub lsv_macro_name: String,
78 pub incs: Vec<Include>,
80 pub typedefs: Vec<Typedefs>,
82 pub static_vars: Vec<StaticVariable>,
84 pub fncs: Vec<Function>,
86 pub ncls: Vec<NestedCall>,
88 pub callees: Vec<Function>,
90}
91
92impl Parser {
93 pub fn parse(textdata: &str) -> Self {
96 let code = remove_comments(textdata);
97 let fncs = get_fncs(&code);
98 let ncls = get_ncls(&code, &fncs);
99 let callees: Vec<Function> = get_callees(&ncls);
100 let mut static_vars = get_static_vars(&code, &fncs);
101 let lsv_macro_name = "LOCAL_STATIC_VARIABLE".to_string();
102 update_static_vars_with_lsv(&code, &fncs, &lsv_macro_name, &mut static_vars);
103 Self {
104 json_object: serde_json::json!({}),
105 sourcename: String::new(),
106 sourcedirname: String::new(),
107 lsv_macro_name: lsv_macro_name,
108 incs: get_incs(&code),
109 typedefs: get_typedefs(&code),
110 static_vars: static_vars,
111 fncs: fncs.clone(),
112 ncls: ncls,
113 callees: callees,
114 }
115 }
116}
117
118fn remove_comments(code: &str) -> String {
121 let re = Regex::new(r"(/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+/)|(//.*)").unwrap();
122 re.replace_all(&code, "").to_string()
123}
124
125fn get_incs(code: &str) -> Vec<Include> {
128 let mut result = vec![];
129 let re = Regex::new(r#"(?P<captured>#include[\s]+["<].+[">])"#).unwrap();
130 for cap in re.captures_iter(code) {
131 result.push(Include {
132 captured: cap.name("captured").unwrap().as_str().trim().to_string(),
133 });
134 }
135 result.dedup();
136 result
137}
138
139fn get_typedefs(code: &str) -> Vec<Typedefs> {
142 let mut result = vec![];
143 let re = Regex::new(r#"(?P<captured>typedef\s+(?:.*?\{[.\s\S]*?\}.*?;|[.\s\S]+?;))"#).unwrap();
144 for cap in re.captures_iter(code) {
145 result.push(Typedefs {
146 captured: cap.name("captured").unwrap().as_str().trim().to_string(),
147 });
148 }
149 result.dedup();
150 result
151}
152
153fn update_static_vars_with_lsv(
156 code: &str,
157 fncs: &Vec<Function>,
158 lsv_macro_name: &str,
159 static_vars: &mut Vec<StaticVariable>,
160) {
161 let regex_str = format!(
162 "{}\\((?<fnc_name>\\w+)\\s*,(?<dtype>.*?)\\s*,\\s*(?<name>\\w+)\\s*(?:\\[(?<array_size>.*?)\\])?\\s*,\\s*(?<value>.*?)\\).*?;",
163 &lsv_macro_name
164 );
165 let re = Regex::new(®ex_str).unwrap();
166 for cap in re.captures_iter(code) {
167 let captured = cap.get(0).unwrap().as_str().trim().to_string();
168 let dtype = cap.name("dtype").unwrap().as_str().trim().to_string();
169 let name = cap.name("name").unwrap().as_str().trim().to_string();
170 let array_size = cap
171 .name("array_size")
172 .map_or(0, |c| c.as_str().parse().unwrap_or(0));
173 let init = cap
174 .name("value")
175 .map_or("0", |c| c.as_str().trim())
176 .to_string();
177 let is_const = cap
178 .name("dtype")
179 .map_or(false, |c| c.as_str().to_lowercase().contains("const"));
180 let name_expr = cap.name("array_size").map_or(name.clone(), |c| {
181 (name.clone() + "[" + c.as_str().trim() + "]").to_string()
182 });
183 let mut is_local = false;
184 let mut func_name = String::from("");
185 for func in fncs {
186 if let Some(pos) = code.find(func.captured.as_str()) {
187 let start = pos + code.get(pos..).unwrap().find('{').unwrap() + 1;
188 let stop = find_end_of_func(code, start);
189 let body = code.get(start..stop).unwrap();
190 if body.contains(captured.as_str()) {
191 is_local = true;
192 func_name = func.name.to_string();
193 }
194 }
195 }
196 static_vars.push(StaticVariable {
197 captured: captured,
198 name_expr: name_expr,
199 name: name,
200 dtype: dtype,
201 is_local: is_local,
202 func_name: func_name,
203 init: init,
204 array_size: array_size,
205 is_const: is_const,
206 });
207 }
208}
209
210fn get_static_vars(code: &str, fncs: &Vec<Function>) -> Vec<StaticVariable> {
213 let mut result = vec![];
214 let re = Regex::new(r"(?i)(?<keyword>static\s+|static\s+const\s+|const\s+static\s+)+(?<dtype>.*?)(?<name>\w+)\s*(?:\[(?<array_size>.*?)\])?\s*(?:=\s*(?<value>\{.*?\}|.*?))?;").unwrap();
215 for cap in re.captures_iter(code) {
216 let captured = cap.get(0).unwrap().as_str().trim().to_string();
217 let dtype = cap.name("dtype").unwrap().as_str().trim().to_string();
218 let name = cap.name("name").unwrap().as_str().trim().to_string();
219 let array_size = cap
220 .name("array_size")
221 .map_or(0, |c| c.as_str().parse().unwrap_or(0));
222 let init = cap
223 .name("value")
224 .map_or("0", |c| c.as_str().trim())
225 .to_string();
226 let is_const = cap
227 .name("keyword")
228 .map_or(false, |c| c.as_str().to_lowercase().contains("const"));
229 let name_expr = cap.name("array_size").map_or(name.clone(), |c| {
230 (name.clone() + "[" + c.as_str().trim() + "]").to_string()
231 });
232 let mut is_local = false;
233 let mut func_name = String::from("");
234 for func in fncs {
235 if let Some(pos) = code.find(func.captured.as_str()) {
236 let start = pos + code.get(pos..).unwrap().find('{').unwrap() + 1;
237 let stop = find_end_of_func(code, start);
238 let body = code.get(start..stop).unwrap();
239 if body.contains(captured.as_str()) {
240 is_local = true;
241 func_name = func.name.to_string();
242 }
243 }
244 }
245 result.push(StaticVariable {
246 captured: captured,
247 name_expr: name_expr,
248 name: name,
249 dtype: dtype,
250 is_local: is_local,
251 func_name: func_name,
252 init: init,
253 array_size: array_size,
254 is_const: is_const,
255 });
256 }
257 result
258}
259
260fn get_fncs(code: &str) -> Vec<Function> {
263 let mut result = vec![];
264 let re = Regex::new(
265 r"((?<return>\w+[\w\s\*]*\s+)|FUNC\((?<return_ex>[^,]+),[^\)]+\)\s*)(?<name>\w+)[\w]*\s*\((?<args>[^=!><>;\(\)-]*)\)\s*\{"
266 ).unwrap();
267 let get_atypes = |args: String| -> (String, String) {
268 let mut type_list = String::new();
269 let mut name_list = String::new();
270 let mut first_pos = true;
271 let arg_list = args.split(',').collect::<Vec<&str>>();
272 for arg in arg_list {
273 let arg = arg.trim();
274 let re4sep = Regex::new(r"^(?<atype>.*?)(?<aname>\w+(?:\[.*?\])*)$").unwrap();
275 let mut atype;
276 let mut aname;
277 if let Some(cap) = re4sep.captures(&arg) {
278 atype = cap.name("atype").unwrap().as_str().trim().to_string();
279 aname = cap.name("aname").unwrap().as_str().trim().to_string();
280 let re4const = Regex::new(r"\w[\s\r\n]+const[\s\r\n]*\*").unwrap();
282 if let Some(_) = re4const.captures(&atype) {
283 atype = atype.replace("const", "");
284 atype = format!("const {}", atype);
285 let re4space = Regex::new(r"\s+").unwrap();
286 atype = re4space.replace_all(&atype, " ").to_string();
287 }
288 if first_pos {
289 first_pos = false;
290 } else {
291 type_list.push_str(", ");
292 name_list.push_str(", ");
293 }
294 type_list.push_str(&atype);
295 let array_dimension = aname.matches("[").count();
296 type_list.push_str(&"*".repeat(array_dimension));
297 let re4bracket = Regex::new(r"(\[.*?\])+").unwrap();
299 if let Some(_) = re4bracket.captures(&aname) {
300 aname = re4bracket.replace_all(&aname, "").to_string();
301 }
302 name_list.push_str(&aname);
303 }
304 }
305 if type_list.trim() == "void" {
306 type_list.clear();
307 name_list.clear();
308 }
309 (type_list, name_list)
310 };
311 for cap in re.captures_iter(code) {
312 if cap.name("name").unwrap().as_str().trim() == "if" {
313 continue;
314 }
315 let re4space = Regex::new(r"\s+").unwrap();
316 let mut raw_args = re4space
317 .replace_all(cap.name("args").unwrap().as_str().trim(), " ")
318 .replace("\\", "")
319 .trim()
320 .to_string();
321 if raw_args.trim() == "void" {
322 raw_args.clear();
323 }
324 let (atypes, anames) = get_atypes(raw_args.clone());
325 let rtype = cap
326 .name("return")
327 .or(cap.name("return_ex"))
328 .unwrap()
329 .as_str()
330 .replace("static", "")
331 .replace("STATIC", "")
332 .replace("inline", "")
333 .replace("INLINE", "")
334 .trim()
335 .to_string();
336 result.push(Function {
337 captured: cap.get(0).unwrap().as_str().trim().to_string(),
338 name: cap.name("name").unwrap().as_str().trim().to_string(),
339 is_local: cap
340 .get(0)
341 .unwrap()
342 .as_str()
343 .to_ascii_lowercase()
344 .contains("static"),
345 rtype: rtype,
346 args: raw_args.clone(),
347 atypes: atypes,
348 anames: anames,
349 });
350 }
351 result
352}
353
354fn find_end_of_func(code: &str, start: usize) -> usize {
357 let mut stop = start;
358 let mut open = 1;
359 for (i, c) in code.get(start..).unwrap().chars().enumerate() {
360 if c == '}' {
361 open -= 1;
362 } else if c == '{' {
363 open += 1;
364 }
365 if open == 0 {
366 stop += i;
367 break;
368 }
369 }
370 stop
371}
372
373fn get_ncls(code: &str, fncs: &Vec<Function>) -> Vec<NestedCall> {
376 let mut result = vec![];
377 for caller in fncs {
378 if let Some(pos) = code.find(caller.captured.as_str()) {
379 let start = pos + code.get(pos..).unwrap().find('{').unwrap() + 1;
380 let stop = find_end_of_func(code, start);
381 let body = code.get(start..stop).unwrap();
382 for callee in fncs {
383 let call_str = format!("{}(", callee.name);
384 if body.contains(call_str.as_str()) {
385 result.push(NestedCall {
386 callee: callee.clone(),
387 caller: caller.clone(),
388 });
389 }
390 }
391 }
392 }
393 result
394}
395
396fn find_func_in_list(funcname: &str, fncs: &Vec<Function>) -> bool {
397 let mut result = false;
398 for fnc in fncs {
399 if funcname == fnc.name {
400 result = true;
401 break;
402 }
403 }
404 result
405}
406
407fn get_callees(ncls: &Vec<NestedCall>) -> Vec<Function> {
408 let mut result: Vec<Function> = vec![];
409 for ncl in ncls {
410 if !find_func_in_list(&ncl.callee.name, &result) {
411 result.push(ncl.callee.clone());
412 }
413 }
414 result
415}
416
417#[cfg(test)]
418mod tests {
419 use super::*;
420 use std::fs;
421
422 static TEST_CODE: &'static str = "\
423#include <stdio.h>
424#include <stdio.h>
425#include \"test.h\"
426
427int global_var = 2;
428static char static_var;
429
430// test-comment1
431/* test-comment2 */
432/*
433 test-comment3
434*/
435
436static inline char local_function(int a);
437
438void main()
439{
440 char c = local_function(20);
441 if (c == 1)
442 {
443 printf(\"no operation\");
444 }
445 else
446 {
447 printf(\"hello world! %c\n\", c);
448 }
449}
450
451static inline char local_function(int a,
452 int*b )
453{
454 static int local_var[10];
455 return (char)a;
456}
457";
458
459 #[test]
460 fn test_parse() {
461 let code = fs::read_to_string("./example/source/sample.c").unwrap();
462 let parser = Parser::parse(&code);
463 println!("{:#?}", parser);
464 }
465
466 #[test]
467 fn test_remove_comments() {
468 let clean_code = remove_comments(TEST_CODE);
469 assert!(!clean_code.contains("test-comment"));
470 }
471
472 #[test]
473 fn test_get_incs() {
474 let list_incs = get_incs(TEST_CODE);
475 assert_eq!(list_incs[0].captured, "#include <stdio.h>");
476 assert_eq!(list_incs[1].captured, "#include \"test.h\"");
477 }
478
479 #[test]
480 fn test_get_static_vars() {
481 let list_fncs = get_fncs(TEST_CODE);
482 let list_static_vars = get_static_vars(TEST_CODE, &list_fncs);
483 assert_eq!(list_static_vars[0].name, "static_var");
484 assert_eq!(list_static_vars[0].dtype, "char");
485 assert_eq!(list_static_vars[0].is_local, false);
486 assert_eq!(list_static_vars[1].name, "local_var");
487 assert_eq!(list_static_vars[1].name_expr, "local_var[10]");
488 assert_eq!(list_static_vars[1].dtype, "int");
489 assert_eq!(list_static_vars[1].is_local, true);
490 assert_eq!(list_static_vars[1].func_name, "local_function");
491 }
492
493 #[test]
494 fn test_get_fncs() {
495 let list_fncs = get_fncs(TEST_CODE);
496 assert_eq!(list_fncs[0].name, "main");
497 assert_eq!(list_fncs[0].rtype, "void");
498 assert!(!list_fncs[0].is_local);
499 assert_eq!(list_fncs[1].name, "local_function");
500 assert_eq!(list_fncs[1].rtype, "char");
501 assert_eq!(list_fncs[1].atypes, "int, int*");
502 assert!(list_fncs[1].is_local);
503 }
504
505 #[test]
506 fn test_get_ncls() {
507 let list_fncs = get_fncs(TEST_CODE);
508 let list_ncls = get_ncls(TEST_CODE, &list_fncs);
509 if list_ncls.len() > 0 {
510 assert_eq!(list_ncls[0].caller.name, "main");
511 assert_eq!(list_ncls[0].callee.name, "local_function");
512 } else {
513 assert!(false);
514 }
515 }
516}