Skip to main content

rust_code_analysis_code_split/
ops.rs

1use std::collections::HashSet;
2use std::path::{Path, PathBuf};
3
4use serde::Serialize;
5
6use crate::checker::Checker;
7use crate::getter::Getter;
8use crate::node::Node;
9use crate::spaces::SpaceKind;
10
11use crate::halstead::{Halstead, HalsteadMaps};
12
13use crate::dump_ops::*;
14use crate::traits::*;
15
16/// All operands and operators of a space.
17#[derive(Debug, Clone, Serialize)]
18pub struct Ops {
19    /// The name of a function space.
20    ///
21    /// If `None`, an error is occurred in parsing
22    /// the name of a function space.
23    pub name: Option<String>,
24    /// The first line of a function space.
25    pub start_line: usize,
26    /// The last line of a function space.
27    pub end_line: usize,
28    /// The space kind.
29    pub kind: SpaceKind,
30    /// All subspaces contained in a function space.
31    pub spaces: Vec<Ops>,
32    /// All operands of a space.
33    pub operands: Vec<String>,
34    /// All operators of a space.
35    pub operators: Vec<String>,
36}
37
38impl Ops {
39    fn new<T: Getter>(node: &Node, code: &[u8], kind: SpaceKind) -> Self {
40        let (start_position, end_position) = match kind {
41            SpaceKind::Unit => {
42                if node.child_count() == 0 {
43                    (0, 0)
44                } else {
45                    (node.start_row() + 1, node.end_row())
46                }
47            }
48            _ => (node.start_row() + 1, node.end_row() + 1),
49        };
50        Self {
51            name: T::get_func_space_name(node, code).map(|name| name.to_string()),
52            spaces: Vec::new(),
53            kind,
54            start_line: start_position,
55            end_line: end_position,
56            operators: Vec::new(),
57            operands: Vec::new(),
58        }
59    }
60
61    pub(crate) fn merge_ops(&mut self, other: &Ops) {
62        self.operands.extend_from_slice(&other.operands);
63        self.operators.extend_from_slice(&other.operators);
64    }
65}
66
67#[derive(Debug, Clone)]
68struct State<'a> {
69    ops: Ops,
70    halstead_maps: HalsteadMaps<'a>,
71    primitive_types: HashSet<String>,
72}
73
74fn compute_operators_and_operands<T: ParserTrait>(state: &mut State) {
75    state.ops.operators = state
76        .halstead_maps
77        .operators
78        .keys()
79        .filter(|k| !T::Checker::is_primitive(**k))
80        .map(|k| T::Getter::get_operator_id_as_str(*k).to_owned())
81        .collect();
82
83    // Add primitive types to operators
84    let v: Vec<_> = state.primitive_types.iter().cloned().collect();
85    state.ops.operators.extend_from_slice(&v);
86    println!("{:?}", state.ops.operators);
87    println!("{:?}", state.halstead_maps.operators);
88
89    state.ops.operands = state
90        .halstead_maps
91        .operands
92        .keys()
93        .map(|k| String::from_utf8(k.to_vec()).unwrap_or_else(|_| String::from("wrong_operands")))
94        .collect();
95}
96
97fn finalize<T: ParserTrait>(state_stack: &mut Vec<State>, diff_level: usize) {
98    if state_stack.is_empty() {
99        return;
100    }
101
102    // If there is only the unit space
103    if state_stack.len() == 1 {
104        let last_state = state_stack.last_mut().unwrap();
105        // Compute last_state operators and operands
106        compute_operators_and_operands::<T>(last_state);
107    }
108
109    for _ in 0..diff_level {
110        if state_stack.len() == 1 {
111            break;
112        } else {
113            let mut state = state_stack.pop().unwrap();
114            let last_state = state_stack.last_mut().unwrap();
115
116            // Compute state operators and operands
117            compute_operators_and_operands::<T>(&mut state);
118
119            // Compute last_state operators and operands
120            compute_operators_and_operands::<T>(last_state);
121
122            // Merge Halstead maps
123            last_state.halstead_maps.merge(&state.halstead_maps);
124
125            // Merge operands and operators between spaces
126            last_state.ops.merge_ops(&state.ops);
127            last_state.ops.spaces.push(state.ops);
128        }
129    }
130}
131
132/// Retrieves all the operators and operands of a code.
133///
134/// If `None`, it was not possible to retrieve the operators and operands
135/// of a code.
136///
137/// # Examples
138///
139/// ```
140/// use std::path::PathBuf;
141///
142/// use rust_code_analysis::{operands_and_operators, CppParser, ParserTrait};
143///
144/// # fn main() {
145/// let source_code = "int a = 42;";
146///
147/// // The path to a dummy file used to contain the source code
148/// let path = PathBuf::from("foo.c");
149/// let source_as_vec = source_code.as_bytes().to_vec();
150///
151/// // The parser of the code, in this case a CPP parser
152/// let parser = CppParser::new(source_as_vec, &path, None);
153///
154/// // Returns the operands and operators of each space in a code.
155/// operands_and_operators(&parser, &path).unwrap();
156/// # }
157/// ```
158pub fn operands_and_operators<'a, T: ParserTrait>(parser: &'a T, path: &'a Path) -> Option<Ops> {
159    let code = parser.get_code();
160    let node = parser.get_root();
161    let mut cursor = node.cursor();
162    let mut stack = Vec::new();
163    let mut children = Vec::new();
164    let mut state_stack: Vec<State> = Vec::new();
165    let mut last_level = 0;
166
167    stack.push((node, 0));
168
169    while let Some((node, level)) = stack.pop() {
170        if level < last_level {
171            finalize::<T>(&mut state_stack, last_level - level);
172            last_level = level;
173        }
174
175        let kind = T::Getter::get_space_kind(&node);
176
177        let func_space = T::Checker::is_func(&node) || T::Checker::is_func_space(&node);
178
179        let new_level = if func_space {
180            let state = State {
181                ops: Ops::new::<T::Getter>(&node, code, kind),
182                halstead_maps: HalsteadMaps::new(),
183                primitive_types: HashSet::new(),
184            };
185            state_stack.push(state);
186            last_level = level + 1;
187            last_level
188        } else {
189            level
190        };
191
192        if let Some(state) = state_stack.last_mut() {
193            T::Halstead::compute(&node, code, &mut state.halstead_maps);
194            if T::Checker::is_primitive(node.kind_id()) {
195                let code = &code[node.start_byte()..node.end_byte()];
196                let primitive_string = String::from_utf8(code.to_vec())
197                    .unwrap_or_else(|_| String::from("primitive_type"));
198                state.primitive_types.insert(primitive_string);
199            }
200        }
201
202        cursor.reset(&node);
203        if cursor.goto_first_child() {
204            loop {
205                children.push((cursor.node(), new_level));
206                if !cursor.goto_next_sibling() {
207                    break;
208                }
209            }
210            for child in children.drain(..).rev() {
211                stack.push(child);
212            }
213        }
214    }
215
216    finalize::<T>(&mut state_stack, usize::MAX);
217
218    state_stack.pop().map(|mut state| {
219        state.ops.name = path.to_str().map(|name| name.to_string());
220        state.ops
221    })
222}
223
224/// Configuration options for retrieving
225/// all the operands and operators in a code.
226#[derive(Debug)]
227pub struct OpsCfg {
228    /// Path to the file containing the code.
229    pub path: PathBuf,
230}
231
232pub struct OpsCode {
233    _guard: (),
234}
235
236impl Callback for OpsCode {
237    type Res = std::io::Result<()>;
238    type Cfg = OpsCfg;
239
240    fn call<T: ParserTrait>(cfg: Self::Cfg, parser: &T) -> Self::Res {
241        if let Some(ops) = operands_and_operators(parser, &cfg.path) {
242            dump_ops(&ops)
243        } else {
244            Ok(())
245        }
246    }
247}
248
249#[cfg(test)]
250mod tests {
251    use std::path::PathBuf;
252
253    use crate::{LANG, get_ops};
254
255    #[inline(always)]
256    fn check_ops(
257        lang: LANG,
258        source: &str,
259        file: &str,
260        correct_operators: &mut [&str],
261        correct_operands: &mut [&str],
262    ) {
263        let path = PathBuf::from(file);
264        let mut trimmed_bytes = source.trim_end().trim_matches('\n').as_bytes().to_vec();
265        trimmed_bytes.push(b'\n');
266        let ops = get_ops(&lang, trimmed_bytes, &path, None).unwrap();
267
268        let mut operators_str: Vec<&str> = ops.operators.iter().map(AsRef::as_ref).collect();
269        let mut operands_str: Vec<&str> = ops.operands.iter().map(AsRef::as_ref).collect();
270
271        // Sorting out operators because they are returned in arbitrary order
272        operators_str.sort_unstable();
273        correct_operators.sort_unstable();
274
275        assert_eq!(&operators_str[..], correct_operators);
276
277        // Sorting out operands because they are returned in arbitrary order
278        operands_str.sort_unstable();
279        correct_operands.sort_unstable();
280
281        assert_eq!(&operands_str[..], correct_operands);
282    }
283
284    #[test]
285    fn python_ops() {
286        check_ops(
287            LANG::Python,
288            "if True:
289                 a = 1 + 2",
290            "foo.py",
291            &mut ["if", "=", "+"],
292            &mut ["True", "a", "1", "2"],
293        );
294    }
295
296    #[test]
297    fn python_function_ops() {
298        check_ops(
299            LANG::Python,
300            "def foo():
301                 def bar():
302                     def toto():
303                        a = 1 + 1
304                     b = 2 + a
305                 c = 3 + 3",
306            "foo.py",
307            &mut ["def", "=", "+"],
308            &mut ["foo", "bar", "toto", "a", "b", "c", "1", "2", "3"],
309        );
310    }
311
312    #[test]
313    fn cpp_ops() {
314        check_ops(
315            LANG::Cpp,
316            "int a, b, c;
317             float avg;
318             avg = (a + b + c) / 3;",
319            "foo.c",
320            &mut ["int", "float", "()", "=", "+", "/", ",", ";"],
321            &mut ["a", "b", "c", "avg", "3"],
322        );
323    }
324
325    #[test]
326    fn cpp_function_ops() {
327        check_ops(
328            LANG::Cpp,
329            "main()
330            {
331              int a, b, c, avg;
332              scanf(\"%d %d %d\", &a, &b, &c);
333              avg = (a + b + c) / 3;
334              printf(\"avg = %d\", avg);
335            }",
336            "foo.c",
337            &mut ["()", "{}", "int", "&", "=", "+", "/", ",", ";"],
338            &mut [
339                "main",
340                "a",
341                "b",
342                "c",
343                "avg",
344                "scanf",
345                "\"%d %d %d\"",
346                "3",
347                "printf",
348                "\"avg = %d\"",
349            ],
350        );
351    }
352
353    #[test]
354    fn rust_ops() {
355        check_ops(
356            LANG::Rust,
357            "let: usize a = 5; let b: f32 = 7.0; let c: i32 = 3;",
358            "foo.rs",
359            &mut ["let", "usize", "=", ";", "f32", "i32"],
360            &mut ["a", "b", "c", "5", "7.0", "3"],
361        );
362    }
363
364    #[test]
365    fn rust_function_ops() {
366        check_ops(
367            LANG::Rust,
368            "fn main() {
369              let a = 5; let b = 5; let c = 5;
370              let avg = (a + b + c) / 3;
371              println!(\"{}\", avg);
372            }",
373            "foo.rs",
374            &mut ["fn", "()", "{}", "let", "=", "+", "/", ";", "!", ","],
375            &mut ["main", "a", "b", "c", "avg", "5", "3", "println", "\"{}\""],
376        );
377    }
378
379    #[test]
380    fn javascript_ops() {
381        check_ops(
382            LANG::Javascript,
383            "var a, b, c, avg;
384             let x = 1;
385             a = 5; b = 5; c = 5;
386             avg = (a + b + c) / 3;
387             console.log(\"{}\", avg);",
388            "foo.js",
389            &mut ["()", "var", "let", "=", "+", "/", ",", ".", ";"],
390            &mut [
391                "a",
392                "b",
393                "c",
394                "avg",
395                "x",
396                "1",
397                "3",
398                "5",
399                "console.log",
400                "console",
401                "log",
402                "\"{}\"",
403            ],
404        );
405    }
406
407    #[test]
408    fn javascript_function_ops() {
409        check_ops(
410            LANG::Javascript,
411            "function main() {
412              var a, b, c, avg;
413              let x = 1;
414              a = 5; b = 5; c = 5;
415              avg = (a + b + c) / 3;
416              console.log(\"{}\", avg);
417            }",
418            "foo.js",
419            &mut [
420                "function", "()", "{}", "var", "let", "=", "+", "/", ",", ".", ";",
421            ],
422            &mut [
423                "main",
424                "a",
425                "b",
426                "c",
427                "avg",
428                "x",
429                "1",
430                "3",
431                "5",
432                "console.log",
433                "console",
434                "log",
435                "\"{}\"",
436            ],
437        );
438    }
439
440    #[test]
441    fn mozjs_ops() {
442        check_ops(
443            LANG::Mozjs,
444            "var a, b, c, avg;
445             let x = 1;
446             a = 5; b = 5; c = 5;
447             avg = (a + b + c) / 3;
448             console.log(\"{}\", avg);",
449            "foo.js",
450            &mut ["()", "var", "let", "=", "+", "/", ",", ".", ";"],
451            &mut [
452                "a",
453                "b",
454                "c",
455                "avg",
456                "x",
457                "1",
458                "3",
459                "5",
460                "console.log",
461                "console",
462                "log",
463                "\"{}\"",
464            ],
465        );
466    }
467
468    #[test]
469    fn mozjs_function_ops() {
470        check_ops(
471            LANG::Mozjs,
472            "function main() {
473              var a, b, c, avg;
474              let x = 1;
475              a = 5; b = 5; c = 5;
476              avg = (a + b + c) / 3;
477              console.log(\"{}\", avg);
478            }",
479            "foo.js",
480            &mut [
481                "function", "()", "{}", "var", "let", "=", "+", "/", ",", ".", ";",
482            ],
483            &mut [
484                "main",
485                "a",
486                "b",
487                "c",
488                "avg",
489                "x",
490                "1",
491                "3",
492                "5",
493                "console.log",
494                "console",
495                "log",
496                "\"{}\"",
497            ],
498        );
499    }
500
501    #[test]
502    fn typescript_ops() {
503        check_ops(
504            LANG::Typescript,
505            "var a, b, c, avg;
506             let age: number = 32;
507             let name: string = \"John\"; let isUpdated: boolean = true;
508             a = 5; b = 5; c = 5;
509             avg = (a + b + c) / 3;
510             console.log(\"{}\", avg);",
511            "foo.ts",
512            &mut [
513                "()", "var", "let", "string", "number", "boolean", ":", "=", "+", "/", ",", ".",
514                ";",
515            ],
516            &mut [
517                "a",
518                "b",
519                "c",
520                "avg",
521                "age",
522                "name",
523                "isUpdated",
524                "32",
525                "\"John\"",
526                "true",
527                "3",
528                "5",
529                "console.log",
530                "console",
531                "log",
532                "\"{}\"",
533            ],
534        );
535    }
536
537    #[test]
538    fn typescript_function_ops() {
539        check_ops(
540            LANG::Typescript,
541            "function main() {
542              var a, b, c, avg;
543              let age: number = 32;
544              let name: string = \"John\"; let isUpdated: boolean = true;
545              a = 5; b = 5; c = 5;
546              avg = (a + b + c) / 3;
547              console.log(\"{}\", avg);
548            }",
549            "foo.ts",
550            &mut [
551                "function", "()", "{}", "var", "let", "string", "number", "boolean", ":", "=", "+",
552                "/", ",", ".", ";",
553            ],
554            &mut [
555                "main",
556                "a",
557                "b",
558                "c",
559                "avg",
560                "age",
561                "name",
562                "isUpdated",
563                "32",
564                "\"John\"",
565                "true",
566                "3",
567                "5",
568                "console.log",
569                "console",
570                "log",
571                "\"{}\"",
572            ],
573        );
574    }
575
576    #[test]
577    fn tsx_ops() {
578        check_ops(
579            LANG::Tsx,
580            "var a, b, c, avg;
581             let age: number = 32;
582             let name: string = \"John\"; let isUpdated: boolean = true;
583             a = 5; b = 5; c = 5;
584             avg = (a + b + c) / 3;
585             console.log(\"{}\", avg);",
586            "foo.ts",
587            &mut [
588                "()", "var", "let", "string", "number", "boolean", ":", "=", "+", "/", ",", ".",
589                ";",
590            ],
591            &mut [
592                "a",
593                "b",
594                "c",
595                "avg",
596                "age",
597                "name",
598                "isUpdated",
599                "32",
600                "\"John\"",
601                "true",
602                "3",
603                "5",
604                "console.log",
605                "console",
606                "log",
607                "\"{}\"",
608            ],
609        );
610    }
611
612    #[test]
613    fn tsx_function_ops() {
614        check_ops(
615            LANG::Tsx,
616            "function main() {
617              var a, b, c, avg;
618              let age: number = 32;
619              let name: string = \"John\"; let isUpdated: boolean = true;
620              a = 5; b = 5; c = 5;
621              avg = (a + b + c) / 3;
622              console.log(\"{}\", avg);
623            }",
624            "foo.ts",
625            &mut [
626                "function", "()", "{}", "var", "let", "string", "number", "boolean", ":", "=", "+",
627                "/", ",", ".", ";",
628            ],
629            &mut [
630                "main",
631                "a",
632                "b",
633                "c",
634                "avg",
635                "age",
636                "name",
637                "isUpdated",
638                "32",
639                "\"John\"",
640                "true",
641                "3",
642                "5",
643                "console.log",
644                "console",
645                "log",
646                "\"{}\"",
647            ],
648        );
649    }
650
651    #[test]
652    fn java_ops() {
653        check_ops(
654            LANG::Java,
655            "public class Main {
656                public static void main(string args[]) {
657                      int a, b, c, avg;
658                      a = 5; b = 5; c = 5;
659                      avg = (a + b + c) / 3;
660                      MessageFormat.format(\"{0}\", avg);
661                    }
662                }",
663            "foo.java",
664            &mut ["{}", "void", "()", "[]", ",", ";", "int", "=", "+", "/"],
665            &mut [
666                "Main",
667                "main",
668                "args",
669                "a",
670                "b",
671                "c",
672                "avg",
673                "5",
674                "3",
675                "MessageFormat",
676                "format",
677                "\"{0}\"",
678            ],
679        );
680    }
681}