similarity_elixir/parallel.rs
1#![allow(clippy::uninlined_format_args)]
2
3use crate::elixir_parser::ElixirParser;
4use rayon::prelude::*;
5use similarity_core::{
6 cli_parallel::{FileData, SimilarityResult},
7 language_parser::{GenericFunctionDef, LanguageParser},
8 tsed::{calculate_tsed, TSEDOptions},
9};
10use std::fs;
11use std::path::PathBuf;
12
13/// Elixir file with its content and extracted functions
14#[allow(dead_code)]
15pub type ElixirFileData = FileData<GenericFunctionDef>;
16
17/// Load and parse Elixir files in parallel
18#[allow(dead_code)]
19pub fn load_files_parallel(files: &[PathBuf]) -> Vec<ElixirFileData> {
20 files
21 .par_iter()
22 .filter_map(|file| {
23 match fs::read_to_string(file) {
24 Ok(content) => {
25 let filename = file.to_string_lossy();
26 // Create Elixir parser
27 match ElixirParser::new() {
28 Ok(mut parser) => {
29 // Extract functions
30 match parser.extract_functions(&content, &filename) {
31 Ok(functions) => {
32 Some(FileData { path: file.clone(), content, functions })
33 }
34 Err(e) => {
35 eprintln!("Error parsing {}: {}", file.display(), e);
36 None
37 }
38 }
39 }
40 Err(e) => {
41 eprintln!("Error creating parser for {}: {}", file.display(), e);
42 None
43 }
44 }
45 }
46 Err(e) => {
47 eprintln!("Error reading {}: {}", file.display(), e);
48 None
49 }
50 }
51 })
52 .collect()
53}
54
55/// Check for duplicates within Elixir files in parallel
56pub fn check_within_file_duplicates_parallel(
57 files: &[PathBuf],
58 threshold: f64,
59 options: &TSEDOptions,
60) -> Vec<(PathBuf, Vec<SimilarityResult<GenericFunctionDef>>)> {
61 files
62 .par_iter()
63 .filter_map(|file| match fs::read_to_string(file) {
64 Ok(code) => {
65 let file_str = file.to_string_lossy();
66
67 // Create Elixir parser
68 match ElixirParser::new() {
69 Ok(mut parser) => {
70 // Extract functions
71 match parser.extract_functions(&code, &file_str) {
72 Ok(functions) => {
73 let mut similar_pairs = Vec::new();
74
75 // Compare all pairs within the file
76 for i in 0..functions.len() {
77 for j in (i + 1)..functions.len() {
78 let func1 = &functions[i];
79 let func2 = &functions[j];
80
81 // Skip if functions don't meet minimum requirements
82 if func1.end_line - func1.start_line + 1 < options.min_lines
83 || func2.end_line - func2.start_line + 1
84 < options.min_lines
85 {
86 continue;
87 }
88
89 // Extract function bodies
90 let lines: Vec<&str> = code.lines().collect();
91 let body1 = extract_function_body(&lines, func1);
92 let body2 = extract_function_body(&lines, func2);
93
94 // Calculate similarity using Elixir parser
95 let similarity = match (
96 parser.parse(&body1, &format!("{}:func1", file_str)),
97 parser.parse(&body2, &format!("{}:func2", file_str)),
98 ) {
99 (Ok(tree1), Ok(tree2)) => {
100 // Use calculate_tsed to apply size_penalty and other options
101 calculate_tsed(&tree1, &tree2, options)
102 }
103 _ => 0.0,
104 };
105
106 if similarity >= threshold {
107 similar_pairs.push(SimilarityResult::new(
108 func1.clone(),
109 func2.clone(),
110 similarity,
111 ));
112 }
113 }
114 }
115
116 if similar_pairs.is_empty() {
117 None
118 } else {
119 Some((file.clone(), similar_pairs))
120 }
121 }
122 Err(_) => None,
123 }
124 }
125 Err(_) => None,
126 }
127 }
128 Err(_) => None,
129 })
130 .collect()
131}
132
133/// Extract function body from lines
134fn extract_function_body(lines: &[&str], func: &GenericFunctionDef) -> String {
135 let start_idx = (func.body_start_line.saturating_sub(1)) as usize;
136 let end_idx = std::cmp::min(func.body_end_line as usize, lines.len());
137
138 if start_idx >= lines.len() {
139 return String::new();
140 }
141
142 lines[start_idx..end_idx].join("\n")
143}