lawkit_python/subcommands/
benf.rs1use crate::colors;
2use clap::ArgMatches;
3use lawkit_core::{
4 common::{
5 filtering::{apply_number_filter, NumberFilter, RiskThreshold},
6 input::{parse_input_auto, parse_text_input},
7 memory::{streaming_benford_analysis, MemoryConfig},
8 risk::RiskLevel,
9 streaming_io::OptimizedFileReader,
10 },
11 error::{BenfError, Result},
12 laws::benford::BenfordResult,
13};
14use std::str::FromStr;
15
16pub fn run(matches: &ArgMatches) -> Result<()> {
17 if matches.get_flag("verbose") {
19 eprintln!(
20 "Debug: input argument = {:?}",
21 matches.get_one::<String>("input")
22 );
23 }
24
25 if let Some(input) = matches.get_one::<String>("input") {
26 match parse_input_auto(input) {
28 Ok(numbers) => {
29 if numbers.is_empty() {
30 eprintln!("Error: No valid numbers found in input");
31 std::process::exit(1);
32 }
33
34 let result =
36 match analyze_numbers_with_options(matches, input.to_string(), &numbers) {
37 Ok(result) => result,
38 Err(e) => {
39 eprintln!("Analysis error: {e}");
40 std::process::exit(1);
41 }
42 };
43
44 output_results(matches, &result);
46 std::process::exit(result.risk_level.exit_code());
47 }
48 Err(e) => {
49 eprintln!("Error processing input '{input}': {e}");
50 std::process::exit(1);
51 }
52 }
53 } else {
54 if matches.get_flag("verbose") {
56 eprintln!("Debug: Reading from stdin, using automatic optimization");
57 }
58
59 let mut reader = OptimizedFileReader::from_stdin();
61
62 if matches.get_flag("verbose") {
63 eprintln!(
64 "Debug: Using automatic optimization (streaming + incremental + memory efficiency)"
65 );
66 }
67
68 let numbers = match reader.read_lines_streaming(|line| {
70 if matches.get_flag("verbose") {
71 eprintln!("Debug: Processing line: '{line}'");
72 }
73 parse_text_input(&line).map(Some).or(Ok(None))
74 }) {
75 Ok(nested_numbers) => {
76 let flattened: Vec<f64> = nested_numbers.into_iter().flatten().collect();
77 if matches.get_flag("verbose") {
78 eprintln!("Debug: Collected {} numbers from stream", flattened.len());
79 }
80 flattened
81 }
82 Err(e) => {
83 eprintln!("Analysis error: {e}");
84 std::process::exit(1);
85 }
86 };
87
88 let filtered_numbers = if let Some(min_value_str) = matches.get_one::<String>("min-value") {
90 let min_val = min_value_str
91 .parse::<f64>()
92 .map_err(|_| {
93 eprintln!("Error: Invalid minimum value");
94 std::process::exit(2);
95 })
96 .unwrap();
97
98 let original_len = numbers.len();
99 let filtered: Vec<f64> = numbers.into_iter().filter(|&x| x >= min_val).collect();
100
101 if matches.get_flag("verbose") {
102 eprintln!(
103 "Debug: Min-value filter applied: {} → {} numbers (>= {})",
104 original_len,
105 filtered.len(),
106 min_val
107 );
108 eprintln!(
109 "Debug: Filter removed {} values ({:.1}%)",
110 original_len - filtered.len(),
111 100.0 * (original_len - filtered.len()) as f64 / original_len as f64
112 );
113 }
114 filtered
115 } else {
116 numbers
117 };
118
119 let memory_config = MemoryConfig::default();
121
122 let chunk_result =
124 match streaming_benford_analysis(filtered_numbers.into_iter(), &memory_config) {
125 Ok(result) => {
126 if matches.get_flag("verbose") {
127 eprintln!(
128 "Debug: Streaming analysis successful - {} items processed",
129 result.total_items
130 );
131 }
132 result
133 }
134 Err(e) => {
135 eprintln!("Streaming analysis error: {e}");
136 std::process::exit(1);
137 }
138 };
139
140 if chunk_result.total_items == 0 {
141 if matches.get_flag("verbose") {
142 eprintln!(
143 "Debug: Total items in chunk_result: {}",
144 chunk_result.total_items
145 );
146 }
147 eprintln!("Error: No valid numbers found in input");
148 std::process::exit(1);
149 }
150
151 let benford_result =
153 convert_incremental_to_result(&chunk_result.result, "stdin".to_string(), matches);
154
155 if matches.get_flag("verbose") {
157 eprintln!(
158 "Debug: Processed {} numbers in {} chunks",
159 chunk_result.total_items, chunk_result.chunks_processed
160 );
161 eprintln!("Debug: Memory used: {:.2} MB", chunk_result.memory_used_mb);
162 eprintln!(
163 "Debug: Processing time: {} ms",
164 chunk_result.processing_time_ms
165 );
166 }
167
168 output_results(matches, &benford_result);
170 std::process::exit(benford_result.risk_level.exit_code());
171 }
172}
173
174fn output_results(matches: &clap::ArgMatches, result: &BenfordResult) {
175 let format = matches.get_one::<String>("format").unwrap();
176 let quiet = matches.get_flag("quiet");
177 let verbose = matches.get_flag("verbose");
178
179 match format.as_str() {
180 "text" => print_text_output(result, quiet, verbose),
181 "json" => print_json_output(result),
182 "csv" => print_csv_output(result),
183 "yaml" => print_yaml_output(result),
184 "toml" => print_toml_output(result),
185 "xml" => print_xml_output(result),
186 _ => {
187 eprintln!("Error: Unsupported output format: {format}");
188 std::process::exit(2);
189 }
190 }
191}
192
193fn print_text_output(result: &BenfordResult, quiet: bool, verbose: bool) {
194 if quiet {
195 for (i, &observed) in result.digit_distribution.iter().enumerate() {
196 println!("{}: {:.1}%", i + 1, observed);
197 }
198 return;
199 }
200
201 println!("Benford Law Analysis Results");
202 println!();
203 println!("Dataset: {}", result.dataset_name);
204 println!("Numbers analyzed: {}", result.numbers_analyzed);
205 match result.risk_level {
206 RiskLevel::Critical => println!("{}", colors::level_critical("Dataset analysis")),
207 RiskLevel::High => println!("{}", colors::level_high("Dataset analysis")),
208 RiskLevel::Medium => println!("{}", colors::level_medium("Dataset analysis")),
209 RiskLevel::Low => println!("{}", colors::level_low("Dataset analysis")),
210 }
211
212 println!();
213 println!("First Digit Distribution:");
214 println!("{}", format_distribution_bars(result));
215
216 if verbose {
217 println!();
218 println!("First Digit Distribution:");
219 for (i, &observed) in result.digit_distribution.iter().enumerate() {
220 let digit = i + 1;
221 let expected = result.expected_distribution[i];
222 let deviation = observed - expected;
223
224 println!(
225 "{digit}: {observed:.1}% (expected: {expected:.1}%, deviation: {deviation:+.1}%)"
226 );
227 }
228
229 println!();
230 println!("Statistical Tests:");
231 println!(
232 "Chi-square: {:.2} (p-value: {:.6})",
233 result.chi_square, result.p_value
234 );
235 }
236}
237
238fn print_json_output(result: &BenfordResult) {
239 use serde_json::json;
240
241 let output = json!({
242 "dataset": result.dataset_name,
243 "numbers_analyzed": result.numbers_analyzed,
244 "risk_level": format!("{:?}", result.risk_level),
245 "chi_square": result.chi_square,
246 "p_value": result.p_value,
247 "mean_absolute_deviation": result.mean_absolute_deviation
248 });
249
250 println!("{}", serde_json::to_string_pretty(&output).unwrap());
251}
252
253fn print_csv_output(result: &BenfordResult) {
254 println!("dataset,numbers_analyzed,risk_level,chi_square,p_value,mad");
255 println!(
256 "{},{},{:?},{:.6},{:.6},{:.2}",
257 result.dataset_name,
258 result.numbers_analyzed,
259 result.risk_level,
260 result.chi_square,
261 result.p_value,
262 result.mean_absolute_deviation
263 );
264}
265
266fn print_yaml_output(result: &BenfordResult) {
267 println!("dataset: \"{}\"", result.dataset_name);
268 println!("numbers_analyzed: {}", result.numbers_analyzed);
269 println!("risk_level: \"{:?}\"", result.risk_level);
270 println!("chi_square: {:.6}", result.chi_square);
271 println!("p_value: {:.6}", result.p_value);
272 println!("mad: {:.2}", result.mean_absolute_deviation);
273}
274
275fn print_toml_output(result: &BenfordResult) {
276 println!("dataset = \"{}\"", result.dataset_name);
277 println!("numbers_analyzed = {}", result.numbers_analyzed);
278 println!("risk_level = \"{:?}\"", result.risk_level);
279 println!("chi_square = {:.6}", result.chi_square);
280 println!("p_value = {:.6}", result.p_value);
281 println!("mad = {:.2}", result.mean_absolute_deviation);
282}
283
284fn print_xml_output(result: &BenfordResult) {
285 println!("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
286 println!("<benford_analysis>");
287 println!(" <dataset>{}</dataset>", result.dataset_name);
288 println!(
289 " <numbers_analyzed>{}</numbers_analyzed>",
290 result.numbers_analyzed
291 );
292 println!(" <risk_level>{:?}</risk_level>", result.risk_level);
293 println!(" <chi_square>{:.6}</chi_square>", result.chi_square);
294 println!(" <p_value>{:.6}</p_value>", result.p_value);
295 println!(" <mad>{:.2}</mad>", result.mean_absolute_deviation);
296 println!("</benford_analysis>");
297}
298
299fn analyze_numbers_with_options(
301 matches: &clap::ArgMatches,
302 dataset_name: String,
303 numbers: &[f64],
304) -> Result<BenfordResult> {
305 let filtered_numbers = if let Some(filter_str) = matches.get_one::<String>("filter") {
307 let filter = NumberFilter::parse(filter_str)
308 .map_err(|e| BenfError::ParseError(format!("無効なフィルタ: {e}")))?;
309
310 let filtered = apply_number_filter(numbers, &filter);
311
312 if filtered.len() != numbers.len() {
314 eprintln!(
315 "フィルタリング結果: {} 個の数値が {} 個に絞り込まれました ({})",
316 numbers.len(),
317 filtered.len(),
318 filter.description()
319 );
320 }
321
322 filtered
323 } else {
324 numbers.to_vec()
325 };
326
327 let threshold = if let Some(threshold_str) = matches.get_one::<String>("threshold") {
329 if threshold_str == "auto" {
330 RiskThreshold::Auto
331 } else {
332 RiskThreshold::from_str(threshold_str)
333 .map_err(|e| BenfError::ParseError(format!("無効な閾値: {e}")))?
334 }
335 } else {
336 RiskThreshold::Auto
337 };
338
339 let min_count = if let Some(min_count_str) = matches.get_one::<String>("min-count") {
341 min_count_str
342 .parse::<usize>()
343 .map_err(|_| BenfError::ParseError("無効な最小数値数".to_string()))?
344 } else {
345 5
346 };
347
348 let _confidence = if let Some(confidence_str) = matches.get_one::<String>("confidence") {
350 let conf = confidence_str
351 .parse::<f64>()
352 .map_err(|_| BenfError::ParseError("無効な信頼度レベル".to_string()))?;
353 if !(0.01..=0.99).contains(&conf) {
354 return Err(BenfError::ParseError(
355 "信頼度レベルは0.01から0.99の間である必要があります".to_string(),
356 ));
357 }
358 conf
359 } else {
360 0.95
361 };
362
363 let mut working_numbers = filtered_numbers.clone();
365 if let Some(sample_size_str) = matches.get_one::<String>("sample-size") {
366 let max_size = sample_size_str
367 .parse::<usize>()
368 .map_err(|_| BenfError::ParseError("無効なサンプルサイズ".to_string()))?;
369
370 if working_numbers.len() > max_size {
371 eprintln!(
372 "大規模データセット: {}個の数値を{}個にサンプリングしました",
373 working_numbers.len(),
374 max_size
375 );
376 let step = working_numbers.len() / max_size;
378 working_numbers = working_numbers
379 .iter()
380 .step_by(step.max(1))
381 .cloned()
382 .take(max_size)
383 .collect();
384 }
385 }
386
387 if let Some(min_value_str) = matches.get_one::<String>("min-value") {
389 let min_val = min_value_str
390 .parse::<f64>()
391 .map_err(|_| BenfError::ParseError("無効な最小値".to_string()))?;
392
393 let original_len = working_numbers.len();
394 working_numbers.retain(|&x| x >= min_val);
395
396 if working_numbers.len() != original_len {
397 if matches.get_flag("verbose") {
398 eprintln!(
399 "Debug: Min-value filter applied: {} → {} numbers (>= {})",
400 original_len,
401 working_numbers.len(),
402 min_val
403 );
404 eprintln!(
405 "Debug: Filter removed {} values ({:.1}%)",
406 original_len - working_numbers.len(),
407 100.0 * (original_len - working_numbers.len()) as f64 / original_len as f64
408 );
409 } else {
410 eprintln!(
411 "最小値フィルタ適用: {}個の数値が{}個に絞り込まれました (>= {})",
412 original_len,
413 working_numbers.len(),
414 min_val
415 );
416 }
417 }
418 }
419
420 BenfordResult::new_with_threshold(dataset_name, &working_numbers, &threshold, min_count)
422}
423
424fn convert_incremental_to_result(
426 incremental: &lawkit_core::common::memory::IncrementalBenford,
427 dataset_name: String,
428 _matches: &clap::ArgMatches,
429) -> BenfordResult {
430 use lawkit_core::common::statistics;
431
432 let digit_distribution = incremental.get_distribution();
434 let expected_distribution = [
435 30.103, 17.609, 12.494, 9.691, 7.918, 6.695, 5.799, 5.115, 4.576,
436 ];
437
438 let chi_square = statistics::calculate_chi_square(&digit_distribution, &expected_distribution);
440 let p_value = statistics::calculate_p_value(chi_square, 8);
441 let mean_absolute_deviation = incremental.calculate_mad();
442
443 let risk_level = determine_risk_level(mean_absolute_deviation, p_value);
445
446 let verdict = format!("Risk Level: {risk_level:?}");
448
449 BenfordResult {
450 dataset_name,
451 numbers_analyzed: incremental.total_count(),
452 digit_distribution,
453 expected_distribution,
454 chi_square,
455 p_value,
456 mean_absolute_deviation,
457 risk_level,
458 verdict,
459 }
460}
461
462fn determine_risk_level(mad: f64, p_value: f64) -> RiskLevel {
464 if mad > 15.0 || p_value < 0.01 {
465 RiskLevel::Critical
466 } else if mad > 10.0 || p_value < 0.05 {
467 RiskLevel::High
468 } else if mad > 5.0 || p_value < 0.10 {
469 RiskLevel::Medium
470 } else {
471 RiskLevel::Low
472 }
473}
474
475fn format_distribution_bars(result: &BenfordResult) -> String {
476 let mut output = String::new();
477 const CHART_WIDTH: usize = 50;
478
479 for i in 0..9 {
480 let digit = i + 1;
481 let observed = result.digit_distribution[i];
482 let expected = result.expected_distribution[i];
483 let bar_length = ((observed / 100.0) * CHART_WIDTH as f64).round() as usize;
484 let bar_length = bar_length.min(CHART_WIDTH); let expected_line_pos = ((expected / 100.0) * CHART_WIDTH as f64).round() as usize;
488 let expected_line_pos = expected_line_pos.min(CHART_WIDTH - 1); let mut bar_chars = Vec::new();
492 for pos in 0..CHART_WIDTH {
493 if pos == expected_line_pos {
494 bar_chars.push('┃'); } else if pos < bar_length {
496 bar_chars.push('█'); } else {
498 bar_chars.push('░'); }
500 }
501 let full_bar: String = bar_chars.iter().collect();
502
503 output.push_str(&format!(
504 "{digit:1}: {full_bar} {observed:>5.1}% (expected: {expected:>5.1}%)\n"
505 ));
506 }
507
508 output
509}