rust_code_analysis/
ops.rs

1use std::collections::HashSet;
2use std::path::{Path, PathBuf};
3
4use serde::Serialize;
5
6use crate::checker::Checker;
7use crate::getter::Getter;
8use crate::node::Node;
9use crate::spaces::SpaceKind;
10
11use crate::halstead::{Halstead, HalsteadMaps};
12
13use crate::dump_ops::*;
14use crate::traits::*;
15
16/// All operands and operators of a space.
17#[derive(Debug, Clone, Serialize)]
18pub struct Ops {
19    /// The name of a function space.
20    ///
21    /// If `None`, an error is occured in parsing
22    /// the name of a function space.
23    pub name: Option<String>,
24    /// The first line of a function space.
25    pub start_line: usize,
26    /// The last line of a function space.
27    pub end_line: usize,
28    /// The space kind.
29    pub kind: SpaceKind,
30    /// All subspaces contained in a function space.
31    pub spaces: Vec<Ops>,
32    /// All operands of a space.
33    pub operands: Vec<String>,
34    /// All operators of a space.
35    pub operators: Vec<String>,
36}
37
38impl Ops {
39    fn new<T: Getter>(node: &Node, code: &[u8], kind: SpaceKind) -> Self {
40        let (start_position, end_position) = match kind {
41            SpaceKind::Unit => {
42                if node.object().child_count() == 0 {
43                    (0, 0)
44                } else {
45                    (
46                        node.object().start_position().row + 1,
47                        node.object().end_position().row,
48                    )
49                }
50            }
51            _ => (
52                node.object().start_position().row + 1,
53                node.object().end_position().row + 1,
54            ),
55        };
56        Self {
57            name: T::get_func_space_name(node, code).map(|name| name.to_string()),
58            spaces: Vec::new(),
59            kind,
60            start_line: start_position,
61            end_line: end_position,
62            operators: Vec::new(),
63            operands: Vec::new(),
64        }
65    }
66
67    pub(crate) fn merge_ops(&mut self, other: &Ops) {
68        self.operands.extend_from_slice(&other.operands);
69        self.operators.extend_from_slice(&other.operators);
70    }
71}
72
73#[derive(Debug, Clone)]
74struct State<'a> {
75    ops: Ops,
76    halstead_maps: HalsteadMaps<'a>,
77    primitive_types: HashSet<String>,
78}
79
80fn compute_operators_and_operands<T: ParserTrait>(state: &mut State) {
81    state.ops.operators = state
82        .halstead_maps
83        .operators
84        .keys()
85        .filter(|k| !T::Checker::is_primitive(**k))
86        .map(|k| T::Getter::get_operator_id_as_str(*k).to_owned())
87        .collect();
88
89    // Add primitive types to operators
90    let v: Vec<_> = state.primitive_types.iter().cloned().collect();
91    state.ops.operators.extend_from_slice(&v);
92    println!("{:?}", state.ops.operators);
93    println!("{:?}", state.halstead_maps.operators);
94
95    state.ops.operands = state
96        .halstead_maps
97        .operands
98        .keys()
99        .map(|k| String::from_utf8(k.to_vec()).unwrap_or_else(|_| String::from("wrong_operands")))
100        .collect();
101}
102
103fn finalize<T: ParserTrait>(state_stack: &mut Vec<State>, diff_level: usize) {
104    if state_stack.is_empty() {
105        return;
106    }
107
108    // If there is only the unit space
109    if state_stack.len() == 1 {
110        let last_state = state_stack.last_mut().unwrap();
111        // Compute last_state operators and operands
112        compute_operators_and_operands::<T>(last_state);
113    }
114
115    for _ in 0..diff_level {
116        if state_stack.len() == 1 {
117            break;
118        } else {
119            let mut state = state_stack.pop().unwrap();
120            let last_state = state_stack.last_mut().unwrap();
121
122            // Compute state operators and operands
123            compute_operators_and_operands::<T>(&mut state);
124
125            // Compute last_state operators and operands
126            compute_operators_and_operands::<T>(last_state);
127
128            // Merge Halstead maps
129            last_state.halstead_maps.merge(&state.halstead_maps);
130
131            // Merge operands and operators between spaces
132            last_state.ops.merge_ops(&state.ops);
133            last_state.ops.spaces.push(state.ops);
134        }
135    }
136}
137
138/// Retrieves all the operators and operands of a code.
139///
140/// If `None`, it was not possible to retrieve the operators and operands
141/// of a code.
142///
143/// # Examples
144///
145/// ```
146/// use std::path::PathBuf;
147///
148/// use rust_code_analysis::{operands_and_operators, CppParser, ParserTrait};
149///
150/// # fn main() {
151/// let source_code = "int a = 42;";
152///
153/// // The path to a dummy file used to contain the source code
154/// let path = PathBuf::from("foo.c");
155/// let source_as_vec = source_code.as_bytes().to_vec();
156///
157/// // The parser of the code, in this case a CPP parser
158/// let parser = CppParser::new(source_as_vec, &path, None);
159///
160/// // Returns the operands and operators of each space in a code.
161/// operands_and_operators(&parser, &path).unwrap();
162/// # }
163/// ```
164pub fn operands_and_operators<'a, T: ParserTrait>(parser: &'a T, path: &'a Path) -> Option<Ops> {
165    let code = parser.get_code();
166    let node = parser.get_root();
167    let mut cursor = node.object().walk();
168    let mut stack = Vec::new();
169    let mut children = Vec::new();
170    let mut state_stack: Vec<State> = Vec::new();
171    let mut last_level = 0;
172
173    stack.push((node, 0));
174
175    while let Some((node, level)) = stack.pop() {
176        if level < last_level {
177            finalize::<T>(&mut state_stack, last_level - level);
178            last_level = level;
179        }
180
181        let kind = T::Getter::get_space_kind(&node);
182
183        let func_space = T::Checker::is_func(&node) || T::Checker::is_func_space(&node);
184
185        let new_level = if func_space {
186            let state = State {
187                ops: Ops::new::<T::Getter>(&node, code, kind),
188                halstead_maps: HalsteadMaps::new(),
189                primitive_types: HashSet::new(),
190            };
191            state_stack.push(state);
192            last_level = level + 1;
193            last_level
194        } else {
195            level
196        };
197
198        if let Some(state) = state_stack.last_mut() {
199            T::Halstead::compute(&node, code, &mut state.halstead_maps);
200            if T::Checker::is_primitive(node.object().kind_id()) {
201                let code = &code[node.object().start_byte()..node.object().end_byte()];
202                let primitive_string = String::from_utf8(code.to_vec())
203                    .unwrap_or_else(|_| String::from("primitive_type"));
204                state.primitive_types.insert(primitive_string);
205            }
206        }
207
208        cursor.reset(node.object());
209        if cursor.goto_first_child() {
210            loop {
211                children.push((Node::new(cursor.node()), new_level));
212                if !cursor.goto_next_sibling() {
213                    break;
214                }
215            }
216            for child in children.drain(..).rev() {
217                stack.push(child);
218            }
219        }
220    }
221
222    finalize::<T>(&mut state_stack, std::usize::MAX);
223
224    state_stack.pop().map(|mut state| {
225        state.ops.name = path.to_str().map(|name| name.to_string());
226        state.ops
227    })
228}
229
230/// Configuration options for retrieving
231/// all the operands and operators in a code.
232pub struct OpsCfg {
233    /// Path to the file containing the code.
234    pub path: PathBuf,
235}
236
237pub struct OpsCode {
238    _guard: (),
239}
240
241impl Callback for OpsCode {
242    type Res = std::io::Result<()>;
243    type Cfg = OpsCfg;
244
245    fn call<T: ParserTrait>(cfg: Self::Cfg, parser: &T) -> Self::Res {
246        if let Some(ops) = operands_and_operators(parser, &cfg.path) {
247            dump_ops(&ops)
248        } else {
249            Ok(())
250        }
251    }
252}
253
254#[cfg(test)]
255mod tests {
256    use std::path::PathBuf;
257
258    use crate::{get_ops, LANG};
259
260    #[inline(always)]
261    fn check_ops(
262        lang: LANG,
263        source: &str,
264        file: &str,
265        correct_operators: &mut [&str],
266        correct_operands: &mut [&str],
267    ) {
268        let path = PathBuf::from(file);
269        let mut trimmed_bytes = source.trim_end().trim_matches('\n').as_bytes().to_vec();
270        trimmed_bytes.push(b'\n');
271        let ops = get_ops(&lang, trimmed_bytes, &path, None).unwrap();
272
273        let mut operators_str: Vec<&str> = ops.operators.iter().map(AsRef::as_ref).collect();
274        let mut operands_str: Vec<&str> = ops.operands.iter().map(AsRef::as_ref).collect();
275
276        // Sorting out operators because they are returned in arbitrary order
277        operators_str.sort_unstable();
278        correct_operators.sort_unstable();
279
280        assert_eq!(&operators_str[..], correct_operators);
281
282        // Sorting out operands because they are returned in arbitrary order
283        operands_str.sort_unstable();
284        correct_operands.sort_unstable();
285
286        assert_eq!(&operands_str[..], correct_operands);
287    }
288
289    #[test]
290    fn python_ops() {
291        check_ops(
292            LANG::Python,
293            "if True:
294                 a = 1 + 2",
295            "foo.py",
296            &mut ["if", "=", "+"],
297            &mut ["True", "a", "1", "2"],
298        );
299    }
300
301    #[test]
302    fn python_function_ops() {
303        check_ops(
304            LANG::Python,
305            "def foo():
306                 def bar():
307                     def toto():
308                        a = 1 + 1
309                     b = 2 + a
310                 c = 3 + 3",
311            "foo.py",
312            &mut ["def", "=", "+"],
313            &mut ["foo", "bar", "toto", "a", "b", "c", "1", "2", "3"],
314        );
315    }
316
317    #[test]
318    fn cpp_ops() {
319        check_ops(
320            LANG::Cpp,
321            "int a, b, c;
322             float avg;
323             avg = (a + b + c) / 3;",
324            "foo.c",
325            &mut ["int", "float", "()", "=", "+", "/", ",", ";"],
326            &mut ["a", "b", "c", "avg", "3"],
327        );
328    }
329
330    #[test]
331    fn cpp_function_ops() {
332        check_ops(
333            LANG::Cpp,
334            "main()
335            {
336              int a, b, c, avg;
337              scanf(\"%d %d %d\", &a, &b, &c);
338              avg = (a + b + c) / 3;
339              printf(\"avg = %d\", avg);
340            }",
341            "foo.c",
342            &mut ["()", "{}", "int", "&", "=", "+", "/", ",", ";"],
343            &mut [
344                "main",
345                "a",
346                "b",
347                "c",
348                "avg",
349                "scanf",
350                "\"%d %d %d\"",
351                "3",
352                "printf",
353                "\"avg = %d\"",
354            ],
355        );
356    }
357
358    #[test]
359    fn rust_ops() {
360        check_ops(
361            LANG::Rust,
362            "let: usize a = 5; let b: f32 = 7.0; let c: i32 = 3;",
363            "foo.rs",
364            // FIXME tree-sitter-rust does not parse the comma inside the println! macro
365            &mut ["let", "usize", "=", ";", "f32", "i32"],
366            &mut ["a", "b", "c", "5", "7.0", "3"],
367        );
368    }
369
370    #[test]
371    fn rust_function_ops() {
372        check_ops(
373            LANG::Rust,
374            "fn main() {
375              let a = 5; let b = 5; let c = 5;
376              let avg = (a + b + c) / 3;
377              println!(\"{}\", avg);
378            }",
379            "foo.rs",
380            // FIXME tree-sitter-rust does not parse the comma inside the println! macro
381            &mut ["fn", "()", "{}", "let", "=", "+", "/", ";", "!"],
382            &mut ["main", "a", "b", "c", "avg", "5", "3", "println", "\"{}\""],
383        );
384    }
385
386    #[test]
387    fn javascript_ops() {
388        check_ops(
389            LANG::Javascript,
390            "var a, b, c, avg;
391             let x = 1;
392             a = 5; b = 5; c = 5;
393             avg = (a + b + c) / 3;
394             console.log(\"{}\", avg);",
395            "foo.js",
396            &mut ["()", "var", "let", "=", "+", "/", ",", ".", ";"],
397            &mut [
398                "a",
399                "b",
400                "c",
401                "avg",
402                "x",
403                "1",
404                "3",
405                "5",
406                "console.log",
407                "console",
408                "log",
409                "\"{}\"",
410            ],
411        );
412    }
413
414    #[test]
415    fn javascript_function_ops() {
416        check_ops(
417            LANG::Javascript,
418            "function main() {
419              var a, b, c, avg;
420              let x = 1;
421              a = 5; b = 5; c = 5;
422              avg = (a + b + c) / 3;
423              console.log(\"{}\", avg);
424            }",
425            "foo.js",
426            &mut [
427                "function", "()", "{}", "var", "let", "=", "+", "/", ",", ".", ";",
428            ],
429            &mut [
430                "main",
431                "a",
432                "b",
433                "c",
434                "avg",
435                "x",
436                "1",
437                "3",
438                "5",
439                "console.log",
440                "console",
441                "log",
442                "\"{}\"",
443            ],
444        );
445    }
446
447    #[test]
448    fn mozjs_ops() {
449        check_ops(
450            LANG::Mozjs,
451            "var a, b, c, avg;
452             let x = 1;
453             a = 5; b = 5; c = 5;
454             avg = (a + b + c) / 3;
455             console.log(\"{}\", avg);",
456            "foo.js",
457            &mut ["()", "var", "let", "=", "+", "/", ",", ".", ";"],
458            &mut [
459                "a",
460                "b",
461                "c",
462                "avg",
463                "x",
464                "1",
465                "3",
466                "5",
467                "console.log",
468                "console",
469                "log",
470                "\"{}\"",
471            ],
472        );
473    }
474
475    #[test]
476    fn mozjs_function_ops() {
477        check_ops(
478            LANG::Mozjs,
479            "function main() {
480              var a, b, c, avg;
481              let x = 1;
482              a = 5; b = 5; c = 5;
483              avg = (a + b + c) / 3;
484              console.log(\"{}\", avg);
485            }",
486            "foo.js",
487            &mut [
488                "function", "()", "{}", "var", "let", "=", "+", "/", ",", ".", ";",
489            ],
490            &mut [
491                "main",
492                "a",
493                "b",
494                "c",
495                "avg",
496                "x",
497                "1",
498                "3",
499                "5",
500                "console.log",
501                "console",
502                "log",
503                "\"{}\"",
504            ],
505        );
506    }
507
508    #[test]
509    fn typescript_ops() {
510        check_ops(
511            LANG::Typescript,
512            "var a, b, c, avg;
513             let age: number = 32;
514             let name: string = \"John\"; let isUpdated: boolean = true;
515             a = 5; b = 5; c = 5;
516             avg = (a + b + c) / 3;
517             console.log(\"{}\", avg);",
518            "foo.ts",
519            &mut [
520                "()", "var", "let", "string", "number", "boolean", ":", "=", "+", "/", ",", ".",
521                ";",
522            ],
523            &mut [
524                "a",
525                "b",
526                "c",
527                "avg",
528                "age",
529                "name",
530                "isUpdated",
531                "32",
532                "\"John\"",
533                "true",
534                "3",
535                "5",
536                "console.log",
537                "console",
538                "log",
539                "\"{}\"",
540            ],
541        );
542    }
543
544    #[test]
545    fn typescript_function_ops() {
546        check_ops(
547            LANG::Typescript,
548            "function main() {
549              var a, b, c, avg;
550              let age: number = 32;
551              let name: string = \"John\"; let isUpdated: boolean = true;
552              a = 5; b = 5; c = 5;
553              avg = (a + b + c) / 3;
554              console.log(\"{}\", avg);
555            }",
556            "foo.ts",
557            &mut [
558                "function", "()", "{}", "var", "let", "string", "number", "boolean", ":", "=", "+",
559                "/", ",", ".", ";",
560            ],
561            &mut [
562                "main",
563                "a",
564                "b",
565                "c",
566                "avg",
567                "age",
568                "name",
569                "isUpdated",
570                "32",
571                "\"John\"",
572                "true",
573                "3",
574                "5",
575                "console.log",
576                "console",
577                "log",
578                "\"{}\"",
579            ],
580        );
581    }
582
583    #[test]
584    fn tsx_ops() {
585        check_ops(
586            LANG::Tsx,
587            "var a, b, c, avg;
588             let age: number = 32;
589             let name: string = \"John\"; let isUpdated: boolean = true;
590             a = 5; b = 5; c = 5;
591             avg = (a + b + c) / 3;
592             console.log(\"{}\", avg);",
593            "foo.ts",
594            &mut [
595                "()", "var", "let", "string", "number", "boolean", ":", "=", "+", "/", ",", ".",
596                ";",
597            ],
598            &mut [
599                "a",
600                "b",
601                "c",
602                "avg",
603                "age",
604                "name",
605                "isUpdated",
606                "32",
607                "\"John\"",
608                "true",
609                "3",
610                "5",
611                "console.log",
612                "console",
613                "log",
614                "\"{}\"",
615            ],
616        );
617    }
618
619    #[test]
620    fn tsx_function_ops() {
621        check_ops(
622            LANG::Tsx,
623            "function main() {
624              var a, b, c, avg;
625              let age: number = 32;
626              let name: string = \"John\"; let isUpdated: boolean = true;
627              a = 5; b = 5; c = 5;
628              avg = (a + b + c) / 3;
629              console.log(\"{}\", avg);
630            }",
631            "foo.ts",
632            &mut [
633                "function", "()", "{}", "var", "let", "string", "number", "boolean", ":", "=", "+",
634                "/", ",", ".", ";",
635            ],
636            &mut [
637                "main",
638                "a",
639                "b",
640                "c",
641                "avg",
642                "age",
643                "name",
644                "isUpdated",
645                "32",
646                "\"John\"",
647                "true",
648                "3",
649                "5",
650                "console.log",
651                "console",
652                "log",
653                "\"{}\"",
654            ],
655        );
656    }
657}