similarity_elixir/
parallel.rs

1#![allow(clippy::uninlined_format_args)]
2
3use crate::elixir_parser::ElixirParser;
4use rayon::prelude::*;
5use similarity_core::{
6    cli_parallel::{FileData, SimilarityResult},
7    language_parser::{GenericFunctionDef, LanguageParser},
8    tsed::{calculate_tsed, TSEDOptions},
9};
10use std::fs;
11use std::path::PathBuf;
12
13/// Elixir file with its content and extracted functions
14#[allow(dead_code)]
15pub type ElixirFileData = FileData<GenericFunctionDef>;
16
17/// Load and parse Elixir files in parallel
18#[allow(dead_code)]
19pub fn load_files_parallel(files: &[PathBuf]) -> Vec<ElixirFileData> {
20    files
21        .par_iter()
22        .filter_map(|file| {
23            match fs::read_to_string(file) {
24                Ok(content) => {
25                    let filename = file.to_string_lossy();
26                    // Create Elixir parser
27                    match ElixirParser::new() {
28                        Ok(mut parser) => {
29                            // Extract functions
30                            match parser.extract_functions(&content, &filename) {
31                                Ok(functions) => {
32                                    Some(FileData { path: file.clone(), content, functions })
33                                }
34                                Err(e) => {
35                                    eprintln!("Error parsing {}: {}", file.display(), e);
36                                    None
37                                }
38                            }
39                        }
40                        Err(e) => {
41                            eprintln!("Error creating parser for {}: {}", file.display(), e);
42                            None
43                        }
44                    }
45                }
46                Err(e) => {
47                    eprintln!("Error reading {}: {}", file.display(), e);
48                    None
49                }
50            }
51        })
52        .collect()
53}
54
55/// Check for duplicates within Elixir files in parallel
56pub fn check_within_file_duplicates_parallel(
57    files: &[PathBuf],
58    threshold: f64,
59    options: &TSEDOptions,
60) -> Vec<(PathBuf, Vec<SimilarityResult<GenericFunctionDef>>)> {
61    files
62        .par_iter()
63        .filter_map(|file| match fs::read_to_string(file) {
64            Ok(code) => {
65                let file_str = file.to_string_lossy();
66
67                // Create Elixir parser
68                match ElixirParser::new() {
69                    Ok(mut parser) => {
70                        // Extract functions
71                        match parser.extract_functions(&code, &file_str) {
72                            Ok(functions) => {
73                                let mut similar_pairs = Vec::new();
74
75                                // Compare all pairs within the file
76                                for i in 0..functions.len() {
77                                    for j in (i + 1)..functions.len() {
78                                        let func1 = &functions[i];
79                                        let func2 = &functions[j];
80
81                                        // Skip if functions don't meet minimum requirements
82                                        if func1.end_line - func1.start_line + 1 < options.min_lines
83                                            || func2.end_line - func2.start_line + 1
84                                                < options.min_lines
85                                        {
86                                            continue;
87                                        }
88
89                                        // Extract function bodies
90                                        let lines: Vec<&str> = code.lines().collect();
91                                        let body1 = extract_function_body(&lines, func1);
92                                        let body2 = extract_function_body(&lines, func2);
93
94                                        // Calculate similarity using Elixir parser
95                                        let similarity = match (
96                                            parser.parse(&body1, &format!("{}:func1", file_str)),
97                                            parser.parse(&body2, &format!("{}:func2", file_str)),
98                                        ) {
99                                            (Ok(tree1), Ok(tree2)) => {
100                                                // Use calculate_tsed to apply size_penalty and other options
101                                                calculate_tsed(&tree1, &tree2, options)
102                                            }
103                                            _ => 0.0,
104                                        };
105
106                                        if similarity >= threshold {
107                                            similar_pairs.push(SimilarityResult::new(
108                                                func1.clone(),
109                                                func2.clone(),
110                                                similarity,
111                                            ));
112                                        }
113                                    }
114                                }
115
116                                if similar_pairs.is_empty() {
117                                    None
118                                } else {
119                                    Some((file.clone(), similar_pairs))
120                                }
121                            }
122                            Err(_) => None,
123                        }
124                    }
125                    Err(_) => None,
126                }
127            }
128            Err(_) => None,
129        })
130        .collect()
131}
132
133/// Extract function body from lines
134fn extract_function_body(lines: &[&str], func: &GenericFunctionDef) -> String {
135    let start_idx = (func.body_start_line.saturating_sub(1)) as usize;
136    let end_idx = std::cmp::min(func.body_end_line as usize, lines.len());
137
138    if start_idx >= lines.len() {
139        return String::new();
140    }
141
142    lines[start_idx..end_idx].join("\n")
143}