lawkit_python/subcommands/
normal.rs

1use crate::colors;
2use crate::common_options::{get_optimized_reader, setup_automatic_optimization_config};
3use clap::ArgMatches;
4use lawkit_core::{
5    common::{
6        filtering::{apply_number_filter, NumberFilter},
7        input::{parse_input_auto, parse_text_input},
8        memory::{streaming_normal_analysis, MemoryConfig},
9        outliers::{
10            detect_outliers_dbscan, detect_outliers_ensemble, detect_outliers_isolation,
11            detect_outliers_lof, AdvancedOutlierResult,
12        },
13        streaming_io::OptimizedFileReader,
14        timeseries::{analyze_timeseries, create_timeseries_from_values, TimeSeriesAnalysis},
15    },
16    error::{BenfError, Result},
17    laws::normal::{
18        analyze_normal_distribution, detect_outliers, quality_control_analysis, test_normality,
19        NormalResult, NormalityTest, NormalityTestResult, OutlierDetectionMethod,
20        OutlierDetectionResult, ProcessCapability, QualityControlResult,
21    },
22};
23
24pub fn run(matches: &ArgMatches) -> Result<()> {
25    // 自動最適化設定をセットアップ
26    let (_parallel_config, _memory_config) = setup_automatic_optimization_config();
27
28    // 特殊モードの確認（フラグベースのモードを優先）
29    if matches.get_flag("outliers") {
30        return run_outlier_detection_mode(matches);
31    }
32
33    if matches.get_flag("quality-control") {
34        return run_quality_control_mode(matches);
35    }
36
37    if matches.get_flag("enable-timeseries") {
38        return run_timeseries_analysis_mode(matches);
39    }
40
41    // testパラメータが明示的に指定されている場合のみテストモード
42    if let Some(test_type) = matches.get_one::<String>("test") {
43        if test_type != "all" {
44            // "all"はデフォルトなので通常分析モードで処理
45            return run_normality_test_mode(matches, test_type);
46        }
47    }
48
49    // Determine input source based on arguments
50    if matches.get_flag("verbose") {
51        eprintln!(
52            "Debug: input argument = {:?}",
53            matches.get_one::<String>("input")
54        );
55    }
56
57    // 入力データ処理
58    let numbers = if let Some(input) = matches.get_one::<String>("input") {
59        // ファイル入力の場合
60        match parse_input_auto(input) {
61            Ok(numbers) => {
62                if numbers.is_empty() {
63                    eprintln!("Error: No valid numbers found in input");
64                    std::process::exit(1);
65                }
66                numbers
67            }
68            Err(e) => {
69                eprintln!("Error processing input '{input}': {e}");
70                std::process::exit(1);
71            }
72        }
73    } else {
74        // stdin入力の場合：ストリーミング処理を使用
75        if matches.get_flag("verbose") {
76            eprintln!("Debug: Reading from stdin, using automatic optimization");
77        }
78
79        let mut reader = OptimizedFileReader::from_stdin();
80
81        if matches.get_flag("verbose") {
82            eprintln!(
83                "Debug: Using automatic optimization (streaming + incremental + memory efficiency)"
84            );
85        }
86
87        let numbers = match reader
88            .read_lines_streaming(|line: String| parse_text_input(&line).map(Some).or(Ok(None)))
89        {
90            Ok(nested_numbers) => {
91                let flattened: Vec<f64> = nested_numbers.into_iter().flatten().collect();
92                if matches.get_flag("verbose") {
93                    eprintln!("Debug: Collected {} numbers from stream", flattened.len());
94                }
95                flattened
96            }
97            Err(e) => {
98                eprintln!("Analysis error: {e}");
99                std::process::exit(1);
100            }
101        };
102
103        if numbers.is_empty() {
104            eprintln!("Error: No valid numbers found in input");
105            std::process::exit(1);
106        }
107
108        // インクリメンタルストリーミング分析を実行（より詳細な統計が必要な場合）
109        if numbers.len() > 10000 {
110            let memory_config = MemoryConfig::default();
111            let chunk_result = match streaming_normal_analysis(numbers.into_iter(), &memory_config)
112            {
113                Ok(result) => {
114                    if matches.get_flag("verbose") {
115                        eprintln!(
116                            "Debug: Streaming analysis successful - {} items processed",
117                            result.total_items
118                        );
119                    }
120                    result
121                }
122                Err(e) => {
123                    eprintln!("Streaming analysis error: {e}");
124                    std::process::exit(1);
125                }
126            };
127
128            if matches.get_flag("verbose") {
129                eprintln!(
130                    "Debug: Processed {} numbers in {} chunks",
131                    chunk_result.total_items, chunk_result.chunks_processed
132                );
133                eprintln!("Debug: Memory used: {:.2} MB", chunk_result.memory_used_mb);
134            }
135
136            chunk_result.result.values().to_vec()
137        } else {
138            if matches.get_flag("verbose") {
139                eprintln!("Debug: Memory used: 0.00 MB");
140            }
141            numbers
142        }
143    };
144
145    let dataset_name = matches
146        .get_one::<String>("input")
147        .map(|s| s.to_string())
148        .unwrap_or_else(|| "stdin".to_string());
149
150    let result = match analyze_numbers_with_options(matches, dataset_name, &numbers) {
151        Ok(result) => result,
152        Err(e) => {
153            eprintln!("Analysis error: {e}");
154            std::process::exit(1);
155        }
156    };
157
158    output_results(matches, &result);
159    std::process::exit(result.risk_level.exit_code())
160}
161
162fn run_normality_test_mode(matches: &ArgMatches, test_type: &str) -> Result<()> {
163    let numbers = get_numbers_from_input(matches)?;
164
165    let test = match test_type {
166        "shapiro" => NormalityTest::ShapiroWilk,
167        "anderson" => NormalityTest::AndersonDarling,
168        "ks" => NormalityTest::KolmogorovSmirnov,
169        "all" => NormalityTest::All,
170        _ => {
171            eprintln!(
172                "Error: Unknown test type '{test_type}'. Available: shapiro, anderson, ks, all"
173            );
174            std::process::exit(2);
175        }
176    };
177
178    let test_result = test_normality(&numbers, test)?;
179    output_normality_test_result(matches, &test_result);
180
181    let exit_code = if test_result.is_normal { 0 } else { 1 };
182    std::process::exit(exit_code);
183}
184
185fn run_outlier_detection_mode(matches: &ArgMatches) -> Result<()> {
186    let numbers = get_numbers_from_input(matches)?;
187
188    let method_str = matches
189        .get_one::<String>("outlier-method")
190        .map(|s| s.as_str())
191        .unwrap_or("zscore");
192
193    // 高度な異常値検出手法の処理
194    match method_str {
195        "lof" => {
196            let result = detect_outliers_lof(&numbers, 5)?;
197            output_advanced_outlier_result(matches, &result);
198            let exit_code = if result.outliers.is_empty() { 0 } else { 10 };
199            std::process::exit(exit_code);
200        }
201        "isolation" => {
202            let result = detect_outliers_isolation(&numbers, 8)?;
203            output_advanced_outlier_result(matches, &result);
204            let exit_code = if result.outliers.is_empty() { 0 } else { 10 };
205            std::process::exit(exit_code);
206        }
207        "dbscan" => {
208            let std_dev = calculate_std_dev(&numbers);
209            let eps = std_dev * 0.5;
210            let min_pts = (numbers.len() as f64).sqrt() as usize;
211            let result = detect_outliers_dbscan(&numbers, eps, min_pts)?;
212            output_advanced_outlier_result(matches, &result);
213            let exit_code = if result.outliers.is_empty() { 0 } else { 10 };
214            std::process::exit(exit_code);
215        }
216        "ensemble" => {
217            let result = detect_outliers_ensemble(&numbers)?;
218            output_advanced_outlier_result(matches, &result);
219            let exit_code = if result.outliers.is_empty() { 0 } else { 10 };
220            std::process::exit(exit_code);
221        }
222        _ => {
223            // 既存の異常値検出手法
224            let method = match method_str {
225                "zscore" => OutlierDetectionMethod::ZScore,
226                "modified" | "modified_zscore" => OutlierDetectionMethod::ModifiedZScore,
227                "iqr" => OutlierDetectionMethod::IQR,
228                _ => {
229                    eprintln!(
230                        "Error: Unknown outlier detection method '{method_str}'. Available: zscore, modified_zscore, iqr, lof, isolation, dbscan, ensemble"
231                    );
232                    std::process::exit(2);
233                }
234            };
235
236            let outlier_result = detect_outliers(&numbers, method)?;
237            output_outlier_detection_result(matches, &outlier_result);
238
239            let exit_code = if outlier_result.outliers.is_empty() {
240                0
241            } else {
242                1
243            };
244            std::process::exit(exit_code);
245        }
246    }
247}
248
249fn run_timeseries_analysis_mode(matches: &ArgMatches) -> Result<()> {
250    let numbers = get_numbers_from_input(matches)?;
251
252    // 数値データを時系列データに変換
253    let timeseries_data = create_timeseries_from_values(&numbers);
254
255    // 時系列分析を実行
256    let analysis_result = analyze_timeseries(&timeseries_data)?;
257
258    // 結果を出力
259    output_timeseries_result(matches, &analysis_result);
260
261    std::process::exit(0);
262}
263
264fn run_quality_control_mode(matches: &ArgMatches) -> Result<()> {
265    let numbers = get_numbers_from_input(matches)?;
266
267    let spec_limits = if let Some(limits_str) = matches.get_one::<String>("spec-limits") {
268        parse_spec_limits(limits_str)?
269    } else {
270        None
271    };
272
273    let qc_result = quality_control_analysis(&numbers, spec_limits)?;
274    output_quality_control_result(matches, &qc_result);
275
276    let exit_code = match &qc_result.process_capability {
277        Some(cap) => match cap {
278            ProcessCapability::Excellent => 0,
279            ProcessCapability::Adequate => 1,
280            ProcessCapability::Poor => 2,
281            ProcessCapability::Inadequate => 3,
282        },
283        None => 0,
284    };
285    std::process::exit(exit_code);
286}
287
288fn get_numbers_from_input(matches: &ArgMatches) -> Result<Vec<f64>> {
289    let (_parallel_config, _memory_config) = setup_automatic_optimization_config();
290
291    let buffer = if let Some(input) = matches.get_one::<String>("input") {
292        if input == "-" {
293            get_optimized_reader(None)
294        } else {
295            get_optimized_reader(Some(input))
296        }
297    } else {
298        get_optimized_reader(None)
299    };
300
301    let data = buffer.map_err(|e| BenfError::ParseError(e.to_string()))?;
302    parse_text_input(&data)
303}
304
305fn parse_spec_limits(limits_str: &str) -> Result<Option<(f64, f64)>> {
306    let parts: Vec<&str> = limits_str.split(',').collect();
307    if parts.len() != 2 {
308        return Err(BenfError::ParseError(
309            "Spec limits must be in format 'lower,upper'".to_string(),
310        ));
311    }
312
313    let lower = parts[0]
314        .trim()
315        .parse::<f64>()
316        .map_err(|_| BenfError::ParseError("Invalid lower spec limit".to_string()))?;
317    let upper = parts[1]
318        .trim()
319        .parse::<f64>()
320        .map_err(|_| BenfError::ParseError("Invalid upper spec limit".to_string()))?;
321
322    if lower >= upper {
323        return Err(BenfError::ParseError(
324            "Lower spec limit must be less than upper spec limit".to_string(),
325        ));
326    }
327
328    Ok(Some((lower, upper)))
329}
330
331fn output_results(matches: &clap::ArgMatches, result: &NormalResult) {
332    let format = matches.get_one::<String>("format").unwrap();
333    let quiet = matches.get_flag("quiet");
334    let verbose = matches.get_flag("verbose");
335
336    match format.as_str() {
337        "text" => print_text_output(result, quiet, verbose),
338        "json" => print_json_output(result),
339        "csv" => print_csv_output(result),
340        "yaml" => print_yaml_output(result),
341        "toml" => print_toml_output(result),
342        "xml" => print_xml_output(result),
343        _ => {
344            eprintln!("Error: Unsupported output format: {format}");
345            std::process::exit(2);
346        }
347    }
348}
349
350fn output_normality_test_result(matches: &clap::ArgMatches, result: &NormalityTestResult) {
351    let format_str = matches
352        .get_one::<String>("format")
353        .map(|s| s.as_str())
354        .unwrap_or("text");
355
356    match format_str {
357        "text" => {
358            println!("Test: {}", result.test_name);
359            println!("Statistic: {:.6}", result.statistic);
360            println!("P-value: {:.6}", result.p_value);
361            println!("Is Normal: {}", if result.is_normal { "Yes" } else { "No" });
362        }
363        "json" => {
364            use serde_json::json;
365            let output = json!({
366                "test_name": result.test_name,
367                "statistic": result.statistic,
368                "p_value": result.p_value,
369                "critical_value": result.critical_value,
370                "is_normal": result.is_normal
371            });
372            println!("{}", serde_json::to_string_pretty(&output).unwrap());
373        }
374        _ => print_text_output(
375            &NormalResult::new("test".to_string(), &[0.0; 10]).unwrap(),
376            false,
377            false,
378        ),
379    }
380}
381
382fn output_outlier_detection_result(matches: &clap::ArgMatches, result: &OutlierDetectionResult) {
383    let format_str = matches
384        .get_one::<String>("format")
385        .map(|s| s.as_str())
386        .unwrap_or("text");
387
388    match format_str {
389        "text" => {
390            println!("Method: {}", result.method_name);
391            println!("Outliers found: {}", result.outliers.len());
392
393            if !result.outliers.is_empty() {
394                println!("\nOutlier Details:");
395                for outlier in &result.outliers {
396                    println!("  Index: {} (Value: {:.3})", outlier.index, outlier.value);
397                }
398            }
399        }
400        "json" => {
401            use serde_json::json;
402            let output = json!({
403                "method_name": result.method_name,
404                "threshold": result.threshold,
405                "outliers_count": result.outliers.len(),
406                "outliers": result.outliers.iter().map(|o| json!({
407                    "index": o.index,
408                    "value": o.value,
409                    "score": o.score,
410                    "is_outlier": o.is_outlier
411                })).collect::<Vec<_>>()
412            });
413            println!("{}", serde_json::to_string_pretty(&output).unwrap());
414        }
415        _ => println!("Unsupported format for outlier detection"),
416    }
417}
418
419fn output_quality_control_result(matches: &clap::ArgMatches, result: &QualityControlResult) {
420    let format_str = matches
421        .get_one::<String>("format")
422        .map(|s| s.as_str())
423        .unwrap_or("text");
424
425    match format_str {
426        "text" => {
427            println!("Quality Control Analysis");
428            println!("Mean: {:.3}", result.mean);
429            println!("Standard Deviation: {:.3}", result.std_dev);
430
431            if let (Some(cp), Some(cpk)) = (result.cp, result.cpk) {
432                println!("Cp: {cp:.3}");
433                println!("Cpk: {cpk:.3}");
434
435                if let Some(ref capability) = result.process_capability {
436                    let cap_text = match capability {
437                        ProcessCapability::Excellent => "Excellent",
438                        ProcessCapability::Adequate => "Adequate",
439                        ProcessCapability::Poor => "Poor",
440                        ProcessCapability::Inadequate => "Inadequate",
441                    };
442                    println!("Process Capability: {cap_text}");
443                }
444            }
445
446            if let Some(within_spec) = result.within_spec_percent {
447                println!("Within Specification: {within_spec:.1}%");
448            }
449        }
450        "json" => {
451            use serde_json::json;
452            let output = json!({
453                "mean": result.mean,
454                "std_dev": result.std_dev,
455                "cp": result.cp,
456                "cpk": result.cpk,
457                "within_spec_percent": result.within_spec_percent,
458                "three_sigma_limits": result.three_sigma_limits,
459                "violations_count": result.control_chart_violations.len()
460            });
461            println!("{}", serde_json::to_string_pretty(&output).unwrap());
462        }
463        _ => println!("Unsupported format for quality control"),
464    }
465}
466
467fn print_text_output(result: &NormalResult, quiet: bool, verbose: bool) {
468    if quiet {
469        println!("mean: {:.3}", result.mean);
470        println!("std_dev: {:.3}", result.std_dev);
471        println!("normality_score: {:.3}", result.normality_score);
472        return;
473    }
474
475    println!("Normal Distribution Analysis Results");
476    println!();
477    println!("Dataset: {}", result.dataset_name);
478    println!("Numbers analyzed: {}", result.numbers_analyzed);
479    println!("Quality Level: {:?}", result.risk_level);
480
481    println!();
482    println!("Distribution Histogram:");
483    println!("{}", format_normal_histogram(result));
484
485    println!();
486    println!("Distribution Parameters:");
487    println!("  Mean: {:.3}", result.mean);
488    println!("  Standard Deviation: {:.3}", result.std_dev);
489    println!("  Variance: {:.3}", result.variance);
490    println!("  Skewness: {:.3}", result.skewness);
491    println!("  Kurtosis: {:.3}", result.kurtosis);
492
493    if verbose {
494        println!();
495        println!("Normality Tests:");
496        println!(
497            "  Shapiro-Wilk: W={:.3}, p={:.3}",
498            result.shapiro_wilk_statistic, result.shapiro_wilk_p_value
499        );
500        println!(
501            "  Anderson-Darling: A²={:.3}, p={:.3}",
502            result.anderson_darling_statistic, result.anderson_darling_p_value
503        );
504        println!(
505            "  Kolmogorov-Smirnov: D={:.3}, p={:.3}",
506            result.kolmogorov_smirnov_statistic, result.kolmogorov_smirnov_p_value
507        );
508
509        println!();
510        println!("Quality Metrics:");
511        println!("  Normality Score: {:.3}", result.normality_score);
512        println!("  QQ Correlation: {:.3}", result.qq_correlation);
513        println!("  Distribution Quality: {:.3}", result.distribution_quality);
514
515        if !result.outliers_z_score.is_empty() {
516            println!();
517            println!("Outlier Detection:");
518            println!("  Z-score: {} outliers", result.outliers_z_score.len());
519            println!(
520                "  Modified Z-score: {} outliers",
521                result.outliers_modified_z.len()
522            );
523            println!("  IQR method: {} outliers", result.outliers_iqr.len());
524        }
525
526        println!();
527        println!("Sigma Coverage:");
528        println!("  1σ: {:.1}%", result.within_1_sigma_percent);
529        println!("  2σ: {:.1}%", result.within_2_sigma_percent);
530        println!("  3σ: {:.1}%", result.within_3_sigma_percent);
531
532        println!();
533        println!("Interpretation:");
534        print_normal_interpretation(result);
535    }
536}
537
538fn print_normal_interpretation(result: &NormalResult) {
539    use lawkit_core::common::risk::RiskLevel;
540
541    match result.risk_level {
542        RiskLevel::Low => {
543            println!(
544                "{}",
545                colors::pass("[PASS] Data follows normal distribution well")
546            );
547            println!("   Suitable for standard statistical analysis");
548        }
549        RiskLevel::Medium => {
550            println!(
551                "{}",
552                colors::warn("[WARN] Data shows some deviation from normality")
553            );
554            println!("   Consider robust statistical methods");
555        }
556        RiskLevel::High => {
557            println!(
558                "{}",
559                colors::fail("[FAIL] Data significantly deviates from normality")
560            );
561            println!("   Non-parametric methods recommended");
562        }
563        RiskLevel::Critical => {
564            println!(
565                "{}",
566                colors::critical("[CRITICAL] Data shows extreme deviation from normality")
567            );
568            println!("   Requires special handling and investigation");
569        }
570    }
571
572    // 歪度・尖度に基づく解釈
573    if result.skewness.abs() > 1.0 {
574        if result.skewness > 0.0 {
575            println!(
576                "   {}",
577                colors::info("INFO: Data is right-skewed (positive skewness)")
578            );
579        } else {
580            println!(
581                "   {}",
582                colors::info("INFO: Data is left-skewed (negative skewness)")
583            );
584        }
585    }
586
587    if result.kurtosis > 1.0 {
588        println!(
589            "   {}",
590            colors::info("INFO: Data has heavy tails (high kurtosis)")
591        );
592    } else if result.kurtosis < -1.0 {
593        println!(
594            "   {}",
595            colors::info("INFO: Data has light tails (low kurtosis)")
596        );
597    }
598
599    // 異常値の解釈
600    if !result.outliers_z_score.is_empty() {
601        println!(
602            "   {}",
603            colors::alert(&format!(
604                "ALERT: Outliers detected: {}",
605                result.outliers_z_score.len()
606            ))
607        );
608    }
609}
610
611fn print_json_output(result: &NormalResult) {
612    use serde_json::json;
613
614    let output = json!({
615        "dataset": result.dataset_name,
616        "numbers_analyzed": result.numbers_analyzed,
617        "risk_level": format!("{:?}", result.risk_level),
618        "mean": result.mean,
619        "std_dev": result.std_dev,
620        "variance": result.variance,
621        "skewness": result.skewness,
622        "kurtosis": result.kurtosis,
623        "shapiro_wilk": {
624            "statistic": result.shapiro_wilk_statistic,
625            "p_value": result.shapiro_wilk_p_value
626        },
627        "anderson_darling": {
628            "statistic": result.anderson_darling_statistic,
629            "p_value": result.anderson_darling_p_value
630        },
631        "kolmogorov_smirnov": {
632            "statistic": result.kolmogorov_smirnov_statistic,
633            "p_value": result.kolmogorov_smirnov_p_value
634        },
635        "normality_score": result.normality_score,
636        "qq_correlation": result.qq_correlation,
637        "distribution_quality": result.distribution_quality,
638        "outliers": {
639            "z_score_count": result.outliers_z_score.len(),
640            "modified_z_count": result.outliers_modified_z.len(),
641            "iqr_count": result.outliers_iqr.len()
642        },
643        "confidence_intervals": {
644            "mean_95": result.mean_confidence_interval,
645            "prediction_95": result.prediction_interval_95,
646            "three_sigma": result.three_sigma_limits
647        },
648        "sigma_coverage": {
649            "within_1_sigma": result.within_1_sigma_percent,
650            "within_2_sigma": result.within_2_sigma_percent,
651            "within_3_sigma": result.within_3_sigma_percent
652        }
653    });
654
655    println!("{}", serde_json::to_string_pretty(&output).unwrap());
656}
657
658fn print_csv_output(result: &NormalResult) {
659    println!("dataset,numbers_analyzed,risk_level,mean,std_dev,variance,skewness,kurtosis,normality_score");
660    println!(
661        "{},{},{:?},{:.3},{:.3},{:.3},{:.3},{:.3},{:.3}",
662        result.dataset_name,
663        result.numbers_analyzed,
664        result.risk_level,
665        result.mean,
666        result.std_dev,
667        result.variance,
668        result.skewness,
669        result.kurtosis,
670        result.normality_score
671    );
672}
673
674fn print_yaml_output(result: &NormalResult) {
675    println!("dataset: \"{}\"", result.dataset_name);
676    println!("numbers_analyzed: {}", result.numbers_analyzed);
677    println!("risk_level: \"{:?}\"", result.risk_level);
678    println!("mean: {:.3}", result.mean);
679    println!("std_dev: {:.3}", result.std_dev);
680    println!("variance: {:.3}", result.variance);
681    println!("skewness: {:.3}", result.skewness);
682    println!("kurtosis: {:.3}", result.kurtosis);
683    println!("normality_score: {:.3}", result.normality_score);
684}
685
686fn print_toml_output(result: &NormalResult) {
687    println!("dataset = \"{}\"", result.dataset_name);
688    println!("numbers_analyzed = {}", result.numbers_analyzed);
689    println!("risk_level = \"{:?}\"", result.risk_level);
690    println!("mean = {:.3}", result.mean);
691    println!("std_dev = {:.3}", result.std_dev);
692    println!("variance = {:.3}", result.variance);
693    println!("skewness = {:.3}", result.skewness);
694    println!("kurtosis = {:.3}", result.kurtosis);
695    println!("normality_score = {:.3}", result.normality_score);
696}
697
698fn print_xml_output(result: &NormalResult) {
699    println!("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
700    println!("<normal_analysis>");
701    println!("  <dataset>{}</dataset>", result.dataset_name);
702    println!(
703        "  <numbers_analyzed>{}</numbers_analyzed>",
704        result.numbers_analyzed
705    );
706    println!("  <risk_level>{:?}</risk_level>", result.risk_level);
707    println!("  <mean>{:.3}</mean>", result.mean);
708    println!("  <std_dev>{:.3}</std_dev>", result.std_dev);
709    println!("  <variance>{:.3}</variance>", result.variance);
710    println!("  <skewness>{:.3}</skewness>", result.skewness);
711    println!("  <kurtosis>{:.3}</kurtosis>", result.kurtosis);
712    println!(
713        "  <normality_score>{:.3}</normality_score>",
714        result.normality_score
715    );
716    println!("</normal_analysis>");
717}
718
719/// Analyze numbers with filtering and custom options
720fn analyze_numbers_with_options(
721    matches: &clap::ArgMatches,
722    dataset_name: String,
723    numbers: &[f64],
724) -> Result<NormalResult> {
725    // Apply number filtering if specified
726    let filtered_numbers = if let Some(filter_str) = matches.get_one::<String>("filter") {
727        let filter = NumberFilter::parse(filter_str)
728            .map_err(|e| BenfError::ParseError(format!("無効なフィルタ: {e}")))?;
729
730        let filtered = apply_number_filter(numbers, &filter);
731
732        // Inform user about filtering results
733        if filtered.len() != numbers.len() {
734            eprintln!(
735                "フィルタリング結果: {} 個の数値が {} 個に絞り込まれました ({})",
736                numbers.len(),
737                filtered.len(),
738                filter.description()
739            );
740        }
741
742        filtered
743    } else {
744        numbers.to_vec()
745    };
746
747    // Parse minimum count requirement
748    let min_count = if let Some(min_count_str) = matches.get_one::<String>("min-count") {
749        min_count_str
750            .parse::<usize>()
751            .map_err(|_| BenfError::ParseError("無効な最小数値数".to_string()))?
752    } else {
753        8 // 正規分布分析では最低8個必要
754    };
755
756    // Check minimum count requirement
757    if filtered_numbers.len() < min_count {
758        return Err(BenfError::InsufficientData(filtered_numbers.len()));
759    }
760
761    // Perform normal distribution analysis
762    analyze_normal_distribution(&filtered_numbers, &dataset_name)
763}
764
765/// 高度な異常値検出結果の出力
766fn output_advanced_outlier_result(_matches: &ArgMatches, result: &AdvancedOutlierResult) {
767    println!("Advanced Outlier Detection Result: {}", result.method_name);
768    println!("Detection rate: {:.3}", result.detection_rate);
769    println!("Threshold: {:.3}", result.threshold);
770    println!("Outliers found: {}", result.outliers.len());
771
772    if !result.outliers.is_empty() {
773        println!("\nOutlier Details:");
774        for outlier in &result.outliers {
775            println!(
776                "  Index {}: Value={:.3}, Score={:.3}, Confidence={:.3}",
777                outlier.index, outlier.value, outlier.outlier_score, outlier.confidence
778            );
779        }
780    }
781
782    if !result.method_params.is_empty() {
783        println!("\nMethod Parameters:");
784        for (param, value) in &result.method_params {
785            println!("  {param}: {value:.3}");
786        }
787    }
788}
789
790/// 標準偏差を計算するヘルパー関数
791fn calculate_std_dev(numbers: &[f64]) -> f64 {
792    if numbers.is_empty() {
793        return 0.0;
794    }
795
796    let mean = numbers.iter().sum::<f64>() / numbers.len() as f64;
797    let variance = numbers.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / numbers.len() as f64;
798    variance.sqrt()
799}
800
801/// 時系列分析結果の出力
802fn output_timeseries_result(_matches: &ArgMatches, result: &TimeSeriesAnalysis) {
803    println!("Time Series Analysis Results");
804    println!("============================");
805
806    // トレンド分析
807    println!("\nTrend Analysis:");
808    println!("  Slope: {:.6}", result.trend.slope);
809    println!("  R-squared: {:.3}", result.trend.r_squared);
810    println!("  Direction: {:?}", result.trend.direction);
811    println!("  Trend strength: {:.3}", result.trend.trend_strength);
812
813    // 季節性
814    if result.seasonality.detected {
815        println!("\nSeasonality Detected:");
816        if let Some(period) = result.seasonality.period {
817            println!("  Period: {period:.1}");
818        }
819        println!("  Strength: {:.3}", result.seasonality.strength);
820    } else {
821        println!("\nNo significant seasonality detected");
822    }
823
824    // 変化点
825    if !result.changepoints.is_empty() {
826        println!("\nChange Points Detected: {}", result.changepoints.len());
827        for (i, cp) in result.changepoints.iter().enumerate().take(5) {
828            println!(
829                "  {}: Index {}, Significance: {:.2}, Type: {:?}",
830                i + 1,
831                cp.index,
832                cp.significance,
833                cp.change_type
834            );
835        }
836    }
837
838    // 予測
839    if !result.forecasts.is_empty() {
840        println!("\nForecasts (next {} points):", result.forecasts.len());
841        for (i, forecast) in result.forecasts.iter().enumerate() {
842            println!(
843                "  {}: {:.3} (uncertainty: {:.3})",
844                i + 1,
845                forecast.predicted_value,
846                forecast.uncertainty
847            );
848        }
849    }
850
851    // 異常値
852    if !result.anomalies.is_empty() {
853        println!("\nAnomalies Detected: {}", result.anomalies.len());
854        for anomaly in result.anomalies.iter().take(10) {
855            println!(
856                "  Index {}: Value={:.3}, Expected={:.3}, Score={:.3}",
857                anomaly.index, anomaly.value, anomaly.expected_value, anomaly.anomaly_score
858            );
859        }
860    }
861
862    // データ品質
863    println!("\nData Quality Assessment:");
864    println!(
865        "  Completeness: {:.1}%",
866        result.statistics.data_quality.completeness * 100.0
867    );
868    println!(
869        "  Consistency: {:.1}%",
870        result.statistics.data_quality.consistency * 100.0
871    );
872    println!(
873        "  Outlier ratio: {:.1}%",
874        result.statistics.data_quality.outlier_ratio * 100.0
875    );
876    println!("  Noise level: {:.3}", result.statistics.noise_level);
877}
878
879fn format_normal_histogram(result: &NormalResult) -> String {
880    let mut output = String::new();
881    const CHART_WIDTH: usize = 50;
882    const BINS: usize = 10;
883
884    // 仮想データでヒストグラムをシミュレート（実際のデータはNormalResultから取得不可）
885    // 平均、標準偏差を使って理論的な正規分布カーブを表示
886    let mean = result.mean;
887    let std_dev = result.std_dev;
888
889    // -3σから+3σの範囲でビンを作成
890    let range_start = mean - 3.0 * std_dev;
891    let range_end = mean + 3.0 * std_dev;
892    let bin_width = (range_end - range_start) / BINS as f64;
893
894    // 各ビンの理論的確率密度を計算
895    let mut bin_densities = Vec::new();
896    let mut max_density: f64 = 0.0;
897
898    for i in 0..BINS {
899        let bin_center = range_start + (i as f64 + 0.5) * bin_width;
900        let z_score = (bin_center - mean) / std_dev;
901
902        // 正規分布の確率密度関数
903        let density =
904            (-0.5 * z_score * z_score).exp() / (std_dev * (2.0 * std::f64::consts::PI).sqrt());
905        bin_densities.push(density);
906        max_density = max_density.max(density);
907    }
908
909    // ヒストグラムを表示
910    for (i, &density) in bin_densities.iter().enumerate() {
911        let bin_start = range_start + i as f64 * bin_width;
912        let bin_end = bin_start + bin_width;
913
914        let normalized_density = if max_density > 0.0 {
915            density / max_density
916        } else {
917            0.0
918        };
919        let bar_length = (normalized_density * CHART_WIDTH as f64).round() as usize;
920        let bar_length = bar_length.min(CHART_WIDTH);
921
922        // Calculate theoretical expected density for this bin
923        let theoretical_density = density / max_density; // This is already the normalized theoretical density
924        let expected_line_pos = (theoretical_density * CHART_WIDTH as f64).round() as usize;
925        let expected_line_pos = expected_line_pos.min(CHART_WIDTH - 1);
926
927        // Create bar with filled portion, expected value line, and background
928        let mut bar_chars = Vec::new();
929        for pos in 0..CHART_WIDTH {
930            if pos == expected_line_pos {
931                bar_chars.push('┃'); // Expected value line (theoretical density)
932            } else if pos < bar_length {
933                bar_chars.push('█'); // Filled portion
934            } else {
935                bar_chars.push('░'); // Background portion
936            }
937        }
938        let full_bar: String = bar_chars.iter().collect();
939
940        output.push_str(&format!(
941            "{:6.2}-{:6.2}: {} {:>5.1}%\n",
942            bin_start,
943            bin_end,
944            full_bar,
945            normalized_density * 100.0
946        ));
947    }
948
949    // 統計情報を追加
950    output.push_str(&format!(
951        "\nDistribution: μ={mean:.2}, σ={std_dev:.2}, Range: [{range_start:.2}, {range_end:.2}]"
952    ));
953
954    // σ範囲の情報
955    output.push_str(&format!(
956        "\n1σ: {:.1}%, 2σ: {:.1}%, 3σ: {:.1}%",
957        result.within_1_sigma_percent, result.within_2_sigma_percent, result.within_3_sigma_percent
958    ));
959
960    output
961}
lawkit_python/subcommands/normal.rs

lawkit_python/subcommands/
normal.rs