textalyzer/
lib.rs

1pub mod duplication;
2pub mod file_utils;
3pub mod frequency;
4pub mod line_length;
5pub mod output;
6pub mod types;
7
8extern crate colored;
9extern crate ignore;
10extern crate memmap2;
11extern crate pad;
12extern crate rayon;
13extern crate terminal_size;
14extern crate unicode_width;
15
16use colored::Colorize;
17use std::error::Error;
18use std::fs;
19use std::io::Write;
20use std::path::Path;
21
22use duplication::{find_duplicate_lines, find_multi_line_duplications};
23use file_utils::{find_all_files, load_files};
24use frequency::{format_freq_map, generate_frequency_map};
25use line_length::process_and_output_line_length;
26use output::output_duplications;
27use types::{Command, Config, FrequencyItem};
28use types::{DuplicationItem, DuplicationLocation};
29
30pub fn run<A: Write>(
31  config: Config,
32  mut output_stream: A,
33) -> Result<(), Box<dyn Error>> {
34  match config.command {
35    Command::Histogram { filepath, json } => {
36      let file_content = fs::read_to_string(filepath)?;
37      let freq_map = generate_frequency_map(&file_content);
38
39      if json {
40        // Convert HashMap to Vec<FrequencyItem> for stable JSON output
41        let mut freq_vec: Vec<FrequencyItem> = freq_map
42          .into_iter()
43          .map(|(word, count)| FrequencyItem { word, count })
44          .collect();
45        // Sort by count descending, then alphabetically for stability
46        freq_vec
47          .sort_by(|a, b| b.count.cmp(&a.count).then(a.word.cmp(&b.word)));
48        let json_output = serde_json::to_string_pretty(&freq_vec)?;
49        writeln!(&mut output_stream, "{json_output}")?;
50      } else {
51        let formatted = format_freq_map(freq_map);
52        // Use instead writeln! of println! to avoid "broken pipe" errors
53        writeln!(&mut output_stream, "{formatted}")?;
54      }
55      Ok(())
56    }
57    Command::Duplication {
58      paths,
59      min_lines,
60      files_only,
61      json,
62    } => {
63      // Collect all file entries from all specified paths
64      let mut all_files = Vec::new();
65      let mut scanned_dirs = 0;
66      let mut scanned_files = 0;
67
68      // Process each path argument
69      for path_str in paths {
70        let path = Path::new(&path_str);
71
72        if path.is_file() {
73          // Single file
74          all_files.push(path.to_path_buf());
75          scanned_files += 1;
76        } else if path.is_dir() {
77          // Directory traversal
78          let files = find_all_files(path)?;
79          writeln!(
80            &mut output_stream,
81            "{}",
82            format!(
83              "🔎 Scanning {} files in directory: {}",
84              files.len(),
85              path.display()
86            )
87            .bold()
88          )?;
89
90          all_files.extend(files);
91          scanned_dirs += 1;
92        } else {
93          return Err(
94            format!("Path does not exist: {}", path.display()).into(),
95          );
96        }
97      }
98
99      if scanned_dirs == 0 && scanned_files > 0 {
100        writeln!(
101          &mut output_stream,
102          "{}",
103          format!("🔎 Scanning {} file(s)", all_files.len()).bold()
104        )?;
105      }
106
107      if all_files.is_empty() {
108        return Err("No valid files found in the specified paths".into());
109      }
110
111      // Load all collected files
112      let file_entries = load_files(all_files)?;
113
114      // Choose the appropriate function based on the min_lines value
115      let duplications = if min_lines <= 1 {
116        // For min_lines of 1, use the single-line detection function
117        find_duplicate_lines(file_entries)
118      } else {
119        // For min_lines > 1, use the multi-line detection with filtering
120        let mut results = find_multi_line_duplications(file_entries);
121
122        // Only include those with at least min_lines non-empty lines
123        results.retain(|(content, _)| {
124          let non_empty_lines = content
125            .split('\n')
126            .filter(|line| !line.trim().is_empty())
127            .count();
128          non_empty_lines >= min_lines
129        });
130
131        results
132      };
133
134      if json {
135        let semantic: Vec<DuplicationItem> = duplications
136          .into_iter()
137          .map(|(content, locs)| DuplicationItem {
138            content,
139            locations: locs
140              .into_iter()
141              .map(|(path, line)| DuplicationLocation { path, line })
142              .collect(),
143          })
144          .collect();
145        let json_output = serde_json::to_string_pretty(&semantic)?;
146        writeln!(&mut output_stream, "{json_output}")?;
147        Ok(())
148      } else {
149        output_duplications(duplications, output_stream, files_only)
150      }
151    }
152    Command::LineLength { paths, json } => {
153      // Collect all file entries from all specified paths
154      let mut all_files = Vec::new();
155      let mut scanned_dirs = 0;
156      let mut scanned_files = 0;
157
158      // Process each path argument
159      for path_str in paths {
160        let path = Path::new(&path_str);
161
162        if path.is_file() {
163          // Single file
164          all_files.push(path.to_path_buf());
165          scanned_files += 1;
166        } else if path.is_dir() {
167          // Directory traversal
168          let files = find_all_files(path)?;
169          writeln!(
170            &mut output_stream,
171            "{}",
172            format!(
173              "🔎 Scanning {} files in directory: {}",
174              files.len(),
175              path.display()
176            )
177            .bold()
178          )?;
179
180          all_files.extend(files);
181          scanned_dirs += 1;
182        } else {
183          return Err(
184            format!("Path does not exist: {}", path.display()).into(),
185          );
186        }
187      }
188
189      if scanned_dirs == 0 && scanned_files > 0 {
190        writeln!(
191          &mut output_stream,
192          "{}",
193          format!("🔎 Scanning {} file(s)", all_files.len()).bold()
194        )?;
195      }
196
197      if all_files.is_empty() {
198        return Err("No valid files found in the specified paths".into());
199      }
200
201      // Load all collected files
202      let file_entries = load_files(all_files)?;
203
204      // Process and output the line length histogram
205      process_and_output_line_length(file_entries, output_stream, json)
206    }
207  }
208}