lawkit_python/subcommands/
benf.rs1use crate::colors;
2use clap::ArgMatches;
3use lawkit_core::{
4 common::{
5 filtering::{apply_number_filter, NumberFilter, RiskThreshold},
6 input::{parse_input_auto, parse_text_input},
7 memory::{streaming_benford_analysis, MemoryConfig},
8 risk::RiskLevel,
9 streaming_io::OptimizedFileReader,
10 },
11 error::{BenfError, Result},
12 laws::benford::BenfordResult,
13};
14use std::str::FromStr;
15
16pub fn run(matches: &ArgMatches) -> Result<()> {
17 if matches.get_flag("verbose") {
19 eprintln!(
20 "Debug: input argument = {:?}",
21 matches.get_one::<String>("input")
22 );
23 }
24
25 if let Some(input) = matches.get_one::<String>("input") {
26 match parse_input_auto(input) {
28 Ok(numbers) => {
29 if numbers.is_empty() {
30 eprintln!("Error: No valid numbers found in input");
31 std::process::exit(1);
32 }
33
34 let result =
36 match analyze_numbers_with_options(matches, input.to_string(), &numbers) {
37 Ok(result) => result,
38 Err(e) => {
39 eprintln!("Analysis error: {e}");
40 std::process::exit(1);
41 }
42 };
43
44 output_results(matches, &result);
46 std::process::exit(result.risk_level.exit_code());
47 }
48 Err(e) => {
49 eprintln!("Error processing input '{input}': {e}");
50 std::process::exit(1);
51 }
52 }
53 } else {
54 if matches.get_flag("verbose") {
56 eprintln!("Debug: Reading from stdin, using automatic optimization");
57 }
58
59 let mut reader = OptimizedFileReader::from_stdin();
61
62 if matches.get_flag("verbose") {
63 eprintln!(
64 "Debug: Using automatic optimization (streaming + incremental + memory efficiency)"
65 );
66 }
67
68 let numbers = match reader.read_lines_streaming(|line| {
70 if matches.get_flag("verbose") {
71 eprintln!("Debug: Processing line: '{line}'");
72 }
73 parse_text_input(&line).map(Some).or(Ok(None))
74 }) {
75 Ok(nested_numbers) => {
76 let flattened: Vec<f64> = nested_numbers.into_iter().flatten().collect();
77 if matches.get_flag("verbose") {
78 eprintln!("Debug: Collected {} numbers from stream", flattened.len());
79 }
80 flattened
81 }
82 Err(e) => {
83 eprintln!("Analysis error: {e}");
84 std::process::exit(1);
85 }
86 };
87
88 let filtered_numbers = if let Some(min_value_str) = matches.get_one::<String>("min-value") {
90 let min_val = min_value_str
91 .parse::<f64>()
92 .map_err(|_| {
93 eprintln!("Error: Invalid minimum value");
94 std::process::exit(2);
95 })
96 .unwrap();
97
98 let original_len = numbers.len();
99 let filtered: Vec<f64> = numbers.into_iter().filter(|&x| x >= min_val).collect();
100
101 if matches.get_flag("verbose") {
102 eprintln!(
103 "Debug: Min-value filter applied: {} → {} numbers (>= {})",
104 original_len,
105 filtered.len(),
106 min_val
107 );
108 eprintln!(
109 "Debug: Filter removed {} values ({:.1}%)",
110 original_len - filtered.len(),
111 100.0 * (original_len - filtered.len()) as f64 / original_len as f64
112 );
113 }
114 filtered
115 } else {
116 numbers
117 };
118
119 let memory_config = MemoryConfig::default();
121
122 let chunk_result =
124 match streaming_benford_analysis(filtered_numbers.into_iter(), &memory_config) {
125 Ok(result) => {
126 if matches.get_flag("verbose") {
127 eprintln!(
128 "Debug: Streaming analysis successful - {} items processed",
129 result.total_items
130 );
131 }
132 result
133 }
134 Err(e) => {
135 eprintln!("Streaming analysis error: {e}");
136 std::process::exit(1);
137 }
138 };
139
140 if chunk_result.total_items == 0 {
141 if matches.get_flag("verbose") {
142 eprintln!(
143 "Debug: Total items in chunk_result: {}",
144 chunk_result.total_items
145 );
146 }
147 eprintln!("Error: No valid numbers found in input");
148 std::process::exit(1);
149 }
150
151 let benford_result =
153 convert_incremental_to_result(&chunk_result.result, "stdin".to_string(), matches);
154
155 if matches.get_flag("verbose") {
157 eprintln!(
158 "Debug: Processed {} numbers in {} chunks",
159 chunk_result.total_items, chunk_result.chunks_processed
160 );
161 eprintln!("Debug: Memory used: {:.2} MB", chunk_result.memory_used_mb);
162 eprintln!(
163 "Debug: Processing time: {} ms",
164 chunk_result.processing_time_ms
165 );
166 }
167
168 output_results(matches, &benford_result);
170 std::process::exit(benford_result.risk_level.exit_code());
171 }
172}
173
174fn output_results(matches: &clap::ArgMatches, result: &BenfordResult) {
175 let format = matches.get_one::<String>("format").unwrap();
176 let quiet = matches.get_flag("quiet");
177 let verbose = matches.get_flag("verbose");
178 let no_color = matches.get_flag("no-color");
179
180 match format.as_str() {
181 "text" => print_text_output(result, quiet, verbose, no_color),
182 "json" => print_json_output(result),
183 "csv" => print_csv_output(result),
184 "yaml" => print_yaml_output(result),
185 "toml" => print_toml_output(result),
186 "xml" => print_xml_output(result),
187 _ => {
188 eprintln!("Error: Unsupported output format: {format}");
189 std::process::exit(2);
190 }
191 }
192}
193
194fn print_text_output(result: &BenfordResult, quiet: bool, verbose: bool, no_color: bool) {
195 if quiet {
196 for (i, &observed) in result.digit_distribution.iter().enumerate() {
197 println!("{}: {:.1}%", i + 1, observed);
198 }
199 return;
200 }
201
202 println!("Benford Law Analysis Results");
203 println!();
204 println!("Dataset: {}", result.dataset_name);
205 println!("Numbers analyzed: {}", result.numbers_analyzed);
206 match result.risk_level {
207 RiskLevel::Critical => println!("{}", colors::level_critical("Dataset analysis", no_color)),
208 RiskLevel::High => println!("{}", colors::level_high("Dataset analysis", no_color)),
209 RiskLevel::Medium => println!("{}", colors::level_medium("Dataset analysis", no_color)),
210 RiskLevel::Low => println!("{}", colors::level_low("Dataset analysis", no_color)),
211 }
212
213 println!();
214 println!("First Digit Distribution:");
215 println!("{}", format_distribution_bars(result));
216
217 if verbose {
218 println!();
219 println!("First Digit Distribution:");
220 for (i, &observed) in result.digit_distribution.iter().enumerate() {
221 let digit = i + 1;
222 let expected = result.expected_distribution[i];
223 let deviation = observed - expected;
224
225 println!(
226 "{digit}: {observed:.1}% (expected: {expected:.1}%, deviation: {deviation:+.1}%)"
227 );
228 }
229
230 println!();
231 println!("Statistical Tests:");
232 println!(
233 "Chi-square: {:.2} (p-value: {:.6})",
234 result.chi_square, result.p_value
235 );
236 }
237}
238
239fn print_json_output(result: &BenfordResult) {
240 use serde_json::json;
241
242 let output = json!({
243 "dataset": result.dataset_name,
244 "numbers_analyzed": result.numbers_analyzed,
245 "risk_level": format!("{:?}", result.risk_level),
246 "chi_square": result.chi_square,
247 "p_value": result.p_value,
248 "mean_absolute_deviation": result.mean_absolute_deviation
249 });
250
251 println!("{}", serde_json::to_string_pretty(&output).unwrap());
252}
253
254fn print_csv_output(result: &BenfordResult) {
255 println!("dataset,numbers_analyzed,risk_level,chi_square,p_value,mad");
256 println!(
257 "{},{},{:?},{:.6},{:.6},{:.2}",
258 result.dataset_name,
259 result.numbers_analyzed,
260 result.risk_level,
261 result.chi_square,
262 result.p_value,
263 result.mean_absolute_deviation
264 );
265}
266
267fn print_yaml_output(result: &BenfordResult) {
268 println!("dataset: \"{}\"", result.dataset_name);
269 println!("numbers_analyzed: {}", result.numbers_analyzed);
270 println!("risk_level: \"{:?}\"", result.risk_level);
271 println!("chi_square: {:.6}", result.chi_square);
272 println!("p_value: {:.6}", result.p_value);
273 println!("mad: {:.2}", result.mean_absolute_deviation);
274}
275
276fn print_toml_output(result: &BenfordResult) {
277 println!("dataset = \"{}\"", result.dataset_name);
278 println!("numbers_analyzed = {}", result.numbers_analyzed);
279 println!("risk_level = \"{:?}\"", result.risk_level);
280 println!("chi_square = {:.6}", result.chi_square);
281 println!("p_value = {:.6}", result.p_value);
282 println!("mad = {:.2}", result.mean_absolute_deviation);
283}
284
285fn print_xml_output(result: &BenfordResult) {
286 println!("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
287 println!("<benford_analysis>");
288 println!(" <dataset>{}</dataset>", result.dataset_name);
289 println!(
290 " <numbers_analyzed>{}</numbers_analyzed>",
291 result.numbers_analyzed
292 );
293 println!(" <risk_level>{:?}</risk_level>", result.risk_level);
294 println!(" <chi_square>{:.6}</chi_square>", result.chi_square);
295 println!(" <p_value>{:.6}</p_value>", result.p_value);
296 println!(" <mad>{:.2}</mad>", result.mean_absolute_deviation);
297 println!("</benford_analysis>");
298}
299
300fn analyze_numbers_with_options(
302 matches: &clap::ArgMatches,
303 dataset_name: String,
304 numbers: &[f64],
305) -> Result<BenfordResult> {
306 let filtered_numbers = if let Some(filter_str) = matches.get_one::<String>("filter") {
308 let filter = NumberFilter::parse(filter_str)
309 .map_err(|e| BenfError::ParseError(format!("無効なフィルタ: {e}")))?;
310
311 let filtered = apply_number_filter(numbers, &filter);
312
313 if filtered.len() != numbers.len() {
315 eprintln!(
316 "フィルタリング結果: {} 個の数値が {} 個に絞り込まれました ({})",
317 numbers.len(),
318 filtered.len(),
319 filter.description()
320 );
321 }
322
323 filtered
324 } else {
325 numbers.to_vec()
326 };
327
328 let threshold = if let Some(threshold_str) = matches.get_one::<String>("threshold") {
330 if threshold_str == "auto" {
331 RiskThreshold::Auto
332 } else {
333 RiskThreshold::from_str(threshold_str)
334 .map_err(|e| BenfError::ParseError(format!("無効な閾値: {e}")))?
335 }
336 } else {
337 RiskThreshold::Auto
338 };
339
340 let min_count = if let Some(min_count_str) = matches.get_one::<String>("min-count") {
342 min_count_str
343 .parse::<usize>()
344 .map_err(|_| BenfError::ParseError("無効な最小数値数".to_string()))?
345 } else {
346 5
347 };
348
349 let _confidence = if let Some(confidence_str) = matches.get_one::<String>("confidence") {
351 let conf = confidence_str
352 .parse::<f64>()
353 .map_err(|_| BenfError::ParseError("無効な信頼度レベル".to_string()))?;
354 if !(0.01..=0.99).contains(&conf) {
355 return Err(BenfError::ParseError(
356 "信頼度レベルは0.01から0.99の間である必要があります".to_string(),
357 ));
358 }
359 conf
360 } else {
361 0.95
362 };
363
364 let mut working_numbers = filtered_numbers.clone();
366 if let Some(sample_size_str) = matches.get_one::<String>("sample-size") {
367 let max_size = sample_size_str
368 .parse::<usize>()
369 .map_err(|_| BenfError::ParseError("無効なサンプルサイズ".to_string()))?;
370
371 if working_numbers.len() > max_size {
372 eprintln!(
373 "大規模データセット: {}個の数値を{}個にサンプリングしました",
374 working_numbers.len(),
375 max_size
376 );
377 let step = working_numbers.len() / max_size;
379 working_numbers = working_numbers
380 .iter()
381 .step_by(step.max(1))
382 .cloned()
383 .take(max_size)
384 .collect();
385 }
386 }
387
388 if let Some(min_value_str) = matches.get_one::<String>("min-value") {
390 let min_val = min_value_str
391 .parse::<f64>()
392 .map_err(|_| BenfError::ParseError("無効な最小値".to_string()))?;
393
394 let original_len = working_numbers.len();
395 working_numbers.retain(|&x| x >= min_val);
396
397 if working_numbers.len() != original_len {
398 if matches.get_flag("verbose") {
399 eprintln!(
400 "Debug: Min-value filter applied: {} → {} numbers (>= {})",
401 original_len,
402 working_numbers.len(),
403 min_val
404 );
405 eprintln!(
406 "Debug: Filter removed {} values ({:.1}%)",
407 original_len - working_numbers.len(),
408 100.0 * (original_len - working_numbers.len()) as f64 / original_len as f64
409 );
410 } else {
411 eprintln!(
412 "最小値フィルタ適用: {}個の数値が{}個に絞り込まれました (>= {})",
413 original_len,
414 working_numbers.len(),
415 min_val
416 );
417 }
418 }
419 }
420
421 BenfordResult::new_with_threshold(dataset_name, &working_numbers, &threshold, min_count)
423}
424
425fn convert_incremental_to_result(
427 incremental: &lawkit_core::common::memory::IncrementalBenford,
428 dataset_name: String,
429 _matches: &clap::ArgMatches,
430) -> BenfordResult {
431 use lawkit_core::common::statistics;
432
433 let digit_distribution = incremental.get_distribution();
435 let expected_distribution = [
436 30.103, 17.609, 12.494, 9.691, 7.918, 6.695, 5.799, 5.115, 4.576,
437 ];
438
439 let chi_square = statistics::calculate_chi_square(&digit_distribution, &expected_distribution);
441 let p_value = statistics::calculate_p_value(chi_square, 8);
442 let mean_absolute_deviation = incremental.calculate_mad();
443
444 let risk_level = determine_risk_level(mean_absolute_deviation, p_value);
446
447 let verdict = format!("Risk Level: {risk_level:?}");
449
450 BenfordResult {
451 dataset_name,
452 numbers_analyzed: incremental.total_count(),
453 digit_distribution,
454 expected_distribution,
455 chi_square,
456 p_value,
457 mean_absolute_deviation,
458 risk_level,
459 verdict,
460 }
461}
462
463fn determine_risk_level(mad: f64, p_value: f64) -> RiskLevel {
465 if mad > 15.0 || p_value < 0.01 {
466 RiskLevel::Critical
467 } else if mad > 10.0 || p_value < 0.05 {
468 RiskLevel::High
469 } else if mad > 5.0 || p_value < 0.10 {
470 RiskLevel::Medium
471 } else {
472 RiskLevel::Low
473 }
474}
475
476fn format_distribution_bars(result: &BenfordResult) -> String {
477 let mut output = String::new();
478 const CHART_WIDTH: usize = 50;
479
480 for i in 0..9 {
481 let digit = i + 1;
482 let observed = result.digit_distribution[i];
483 let expected = result.expected_distribution[i];
484 let bar_length = ((observed / 100.0) * CHART_WIDTH as f64).round() as usize;
485 let bar_length = bar_length.min(CHART_WIDTH); let expected_line_pos = ((expected / 100.0) * CHART_WIDTH as f64).round() as usize;
489 let expected_line_pos = expected_line_pos.min(CHART_WIDTH - 1); let mut bar_chars = Vec::new();
493 for pos in 0..CHART_WIDTH {
494 if pos == expected_line_pos {
495 bar_chars.push('┃'); } else if pos < bar_length {
497 bar_chars.push('█'); } else {
499 bar_chars.push('░'); }
501 }
502 let full_bar: String = bar_chars.iter().collect();
503
504 output.push_str(&format!(
505 "{digit:1}: {full_bar} {observed:>5.1}% (expected: {expected:>5.1}%)\n"
506 ));
507 }
508
509 output
510}