lawkit_python/subcommands/
normal.rs

1use crate::colors;
2use crate::common_options::{get_optimized_reader, setup_automatic_optimization_config};
3use clap::ArgMatches;
4use lawkit_core::{
5    common::{
6        filtering::{apply_number_filter, NumberFilter},
7        input::{parse_input_auto, parse_text_input},
8        memory::{streaming_normal_analysis, MemoryConfig},
9        outliers::{
10            detect_outliers_dbscan, detect_outliers_ensemble, detect_outliers_isolation,
11            detect_outliers_lof, AdvancedOutlierResult,
12        },
13        streaming_io::OptimizedFileReader,
14        timeseries::{analyze_timeseries, create_timeseries_from_values, TimeSeriesAnalysis},
15    },
16    error::{BenfError, Result},
17    laws::normal::{
18        analyze_normal_distribution, detect_outliers, quality_control_analysis, test_normality,
19        NormalResult, NormalityTest, NormalityTestResult, OutlierDetectionMethod,
20        OutlierDetectionResult, ProcessCapability, QualityControlResult,
21    },
22};
23
24pub fn run(matches: &ArgMatches) -> Result<()> {
25    // 自動最適化設定をセットアップ
26    let (_parallel_config, _memory_config) = setup_automatic_optimization_config();
27
28    // 特殊モードの確認（フラグベースのモードを優先）
29    if matches.get_flag("outliers") {
30        return run_outlier_detection_mode(matches);
31    }
32
33    if matches.get_flag("quality-control") {
34        return run_quality_control_mode(matches);
35    }
36
37    if matches.get_flag("enable-timeseries") {
38        return run_timeseries_analysis_mode(matches);
39    }
40
41    // testパラメータが明示的に指定されている場合のみテストモード
42    if let Some(test_type) = matches.get_one::<String>("test") {
43        if test_type != "all" {
44            // "all"はデフォルトなので通常分析モードで処理
45            return run_normality_test_mode(matches, test_type);
46        }
47    }
48
49    // Determine input source based on arguments
50    if matches.get_flag("verbose") {
51        eprintln!(
52            "Debug: input argument = {:?}",
53            matches.get_one::<String>("input")
54        );
55    }
56
57    // 入力データ処理
58    let numbers = if let Some(input) = matches.get_one::<String>("input") {
59        // ファイル入力の場合
60        match parse_input_auto(input) {
61            Ok(numbers) => {
62                if numbers.is_empty() {
63                    eprintln!("Error: No valid numbers found in input");
64                    std::process::exit(1);
65                }
66                numbers
67            }
68            Err(e) => {
69                eprintln!("Error processing input '{input}': {e}");
70                std::process::exit(1);
71            }
72        }
73    } else {
74        // stdin入力の場合：ストリーミング処理を使用
75        if matches.get_flag("verbose") {
76            eprintln!("Debug: Reading from stdin, using automatic optimization");
77        }
78
79        let mut reader = OptimizedFileReader::from_stdin();
80
81        if matches.get_flag("verbose") {
82            eprintln!(
83                "Debug: Using automatic optimization (streaming + incremental + memory efficiency)"
84            );
85        }
86
87        let numbers = match reader
88            .read_lines_streaming(|line: String| parse_text_input(&line).map(Some).or(Ok(None)))
89        {
90            Ok(nested_numbers) => {
91                let flattened: Vec<f64> = nested_numbers.into_iter().flatten().collect();
92                if matches.get_flag("verbose") {
93                    eprintln!("Debug: Collected {} numbers from stream", flattened.len());
94                }
95                flattened
96            }
97            Err(e) => {
98                eprintln!("Analysis error: {e}");
99                std::process::exit(1);
100            }
101        };
102
103        if numbers.is_empty() {
104            eprintln!("Error: No valid numbers found in input");
105            std::process::exit(1);
106        }
107
108        // インクリメンタルストリーミング分析を実行（より詳細な統計が必要な場合）
109        if numbers.len() > 10000 {
110            let memory_config = MemoryConfig::default();
111            let chunk_result = match streaming_normal_analysis(numbers.into_iter(), &memory_config)
112            {
113                Ok(result) => {
114                    if matches.get_flag("verbose") {
115                        eprintln!(
116                            "Debug: Streaming analysis successful - {} items processed",
117                            result.total_items
118                        );
119                    }
120                    result
121                }
122                Err(e) => {
123                    eprintln!("Streaming analysis error: {e}");
124                    std::process::exit(1);
125                }
126            };
127
128            if matches.get_flag("verbose") {
129                eprintln!(
130                    "Debug: Processed {} numbers in {} chunks",
131                    chunk_result.total_items, chunk_result.chunks_processed
132                );
133                eprintln!("Debug: Memory used: {:.2} MB", chunk_result.memory_used_mb);
134            }
135
136            chunk_result.result.values().to_vec()
137        } else {
138            if matches.get_flag("verbose") {
139                eprintln!("Debug: Memory used: 0.00 MB");
140            }
141            numbers
142        }
143    };
144
145    let dataset_name = matches
146        .get_one::<String>("input")
147        .map(|s| s.to_string())
148        .unwrap_or_else(|| "stdin".to_string());
149
150    let result = match analyze_numbers_with_options(matches, dataset_name, &numbers) {
151        Ok(result) => result,
152        Err(e) => {
153            eprintln!("Analysis error: {e}");
154            std::process::exit(1);
155        }
156    };
157
158    output_results(matches, &result);
159    std::process::exit(result.risk_level.exit_code())
160}
161
162fn run_normality_test_mode(matches: &ArgMatches, test_type: &str) -> Result<()> {
163    let numbers = get_numbers_from_input(matches)?;
164
165    let test = match test_type {
166        "shapiro" => NormalityTest::ShapiroWilk,
167        "anderson" => NormalityTest::AndersonDarling,
168        "ks" => NormalityTest::KolmogorovSmirnov,
169        "all" => NormalityTest::All,
170        _ => {
171            eprintln!(
172                "Error: Unknown test type '{test_type}'. Available: shapiro, anderson, ks, all"
173            );
174            std::process::exit(2);
175        }
176    };
177
178    let test_result = test_normality(&numbers, test)?;
179    output_normality_test_result(matches, &test_result);
180
181    // Exit code: 0 = normal distribution, 10 = non-normal (HIGH risk indication)
182    let exit_code = if test_result.is_normal { 0 } else { 10 };
183    std::process::exit(exit_code);
184}
185
186fn run_outlier_detection_mode(matches: &ArgMatches) -> Result<()> {
187    let numbers = get_numbers_from_input(matches)?;
188
189    let method_str = matches
190        .get_one::<String>("outlier-method")
191        .map(|s| s.as_str())
192        .unwrap_or("zscore");
193
194    // 高度な異常値検出手法の処理
195    match method_str {
196        "lof" => {
197            let result = detect_outliers_lof(&numbers, 5)?;
198            output_advanced_outlier_result(matches, &result);
199            let exit_code = if result.outliers.is_empty() { 0 } else { 10 };
200            std::process::exit(exit_code);
201        }
202        "isolation" => {
203            let result = detect_outliers_isolation(&numbers, 8)?;
204            output_advanced_outlier_result(matches, &result);
205            let exit_code = if result.outliers.is_empty() { 0 } else { 10 };
206            std::process::exit(exit_code);
207        }
208        "dbscan" => {
209            let std_dev = calculate_std_dev(&numbers);
210            let eps = std_dev * 0.5;
211            let min_pts = (numbers.len() as f64).sqrt() as usize;
212            let result = detect_outliers_dbscan(&numbers, eps, min_pts)?;
213            output_advanced_outlier_result(matches, &result);
214            let exit_code = if result.outliers.is_empty() { 0 } else { 10 };
215            std::process::exit(exit_code);
216        }
217        "ensemble" => {
218            let result = detect_outliers_ensemble(&numbers)?;
219            output_advanced_outlier_result(matches, &result);
220            let exit_code = if result.outliers.is_empty() { 0 } else { 10 };
221            std::process::exit(exit_code);
222        }
223        _ => {
224            // 既存の異常値検出手法
225            let method = match method_str {
226                "zscore" => OutlierDetectionMethod::ZScore,
227                "modified" | "modified_zscore" => OutlierDetectionMethod::ModifiedZScore,
228                "iqr" => OutlierDetectionMethod::IQR,
229                _ => {
230                    eprintln!(
231                        "Error: Unknown outlier detection method '{method_str}'. Available: zscore, modified_zscore, iqr, lof, isolation, dbscan, ensemble"
232                    );
233                    std::process::exit(2);
234                }
235            };
236
237            let outlier_result = detect_outliers(&numbers, method)?;
238            output_outlier_detection_result(matches, &outlier_result);
239
240            // Exit code: 0 = no outliers, 10 = outliers found (HIGH risk indication)
241            let exit_code = if outlier_result.outliers.is_empty() {
242                0
243            } else {
244                10
245            };
246            std::process::exit(exit_code);
247        }
248    }
249}
250
251fn run_timeseries_analysis_mode(matches: &ArgMatches) -> Result<()> {
252    let numbers = get_numbers_from_input(matches)?;
253
254    // 数値データを時系列データに変換
255    let timeseries_data = create_timeseries_from_values(&numbers);
256
257    // 時系列分析を実行
258    let analysis_result = analyze_timeseries(&timeseries_data)?;
259
260    // 結果を出力
261    output_timeseries_result(matches, &analysis_result);
262
263    std::process::exit(0);
264}
265
266fn run_quality_control_mode(matches: &ArgMatches) -> Result<()> {
267    let numbers = get_numbers_from_input(matches)?;
268
269    let spec_limits = if let Some(limits_str) = matches.get_one::<String>("spec-limits") {
270        parse_spec_limits(limits_str)?
271    } else {
272        None
273    };
274
275    let qc_result = quality_control_analysis(&numbers, spec_limits)?;
276    output_quality_control_result(matches, &qc_result);
277
278    let exit_code = match &qc_result.process_capability {
279        Some(cap) => match cap {
280            ProcessCapability::Excellent => 0,
281            ProcessCapability::Adequate => 1,
282            ProcessCapability::Poor => 2,
283            ProcessCapability::Inadequate => 3,
284        },
285        None => 0,
286    };
287    std::process::exit(exit_code);
288}
289
290fn get_numbers_from_input(matches: &ArgMatches) -> Result<Vec<f64>> {
291    let (_parallel_config, _memory_config) = setup_automatic_optimization_config();
292
293    let buffer = if let Some(input) = matches.get_one::<String>("input") {
294        if input == "-" {
295            get_optimized_reader(None)
296        } else {
297            get_optimized_reader(Some(input))
298        }
299    } else {
300        get_optimized_reader(None)
301    };
302
303    let data = buffer.map_err(|e| BenfError::ParseError(e.to_string()))?;
304    parse_text_input(&data)
305}
306
307fn parse_spec_limits(limits_str: &str) -> Result<Option<(f64, f64)>> {
308    let parts: Vec<&str> = limits_str.split(',').collect();
309    if parts.len() != 2 {
310        return Err(BenfError::ParseError(
311            "Spec limits must be in format 'lower,upper'".to_string(),
312        ));
313    }
314
315    let lower = parts[0]
316        .trim()
317        .parse::<f64>()
318        .map_err(|_| BenfError::ParseError("Invalid lower spec limit".to_string()))?;
319    let upper = parts[1]
320        .trim()
321        .parse::<f64>()
322        .map_err(|_| BenfError::ParseError("Invalid upper spec limit".to_string()))?;
323
324    if lower >= upper {
325        return Err(BenfError::ParseError(
326            "Lower spec limit must be less than upper spec limit".to_string(),
327        ));
328    }
329
330    Ok(Some((lower, upper)))
331}
332
333fn output_results(matches: &clap::ArgMatches, result: &NormalResult) {
334    let format = matches.get_one::<String>("format").unwrap();
335    let quiet = matches.get_flag("quiet");
336    let verbose = matches.get_flag("verbose");
337    let no_color = matches.get_flag("no-color");
338
339    match format.as_str() {
340        "text" => print_text_output(result, quiet, verbose, no_color),
341        "json" => print_json_output(result),
342        "csv" => print_csv_output(result),
343        "yaml" => print_yaml_output(result),
344        "toml" => print_toml_output(result),
345        "xml" => print_xml_output(result),
346        _ => {
347            eprintln!("Error: Unsupported output format: {format}");
348            std::process::exit(2);
349        }
350    }
351}
352
353fn output_normality_test_result(matches: &clap::ArgMatches, result: &NormalityTestResult) {
354    let format_str = matches
355        .get_one::<String>("format")
356        .map(|s| s.as_str())
357        .unwrap_or("text");
358
359    match format_str {
360        "text" => {
361            println!("Test: {}", result.test_name);
362            println!("Statistic: {:.6}", result.statistic);
363            println!("P-value: {:.6}", result.p_value);
364            println!("Is Normal: {}", if result.is_normal { "Yes" } else { "No" });
365        }
366        "json" => {
367            use serde_json::json;
368            let output = json!({
369                "test_name": result.test_name,
370                "statistic": result.statistic,
371                "p_value": result.p_value,
372                "critical_value": result.critical_value,
373                "is_normal": result.is_normal
374            });
375            println!("{}", serde_json::to_string_pretty(&output).unwrap());
376        }
377        _ => print_text_output(
378            &NormalResult::new("test".to_string(), &[0.0; 10]).unwrap(),
379            false,
380            false,
381            false,
382        ),
383    }
384}
385
386fn output_outlier_detection_result(matches: &clap::ArgMatches, result: &OutlierDetectionResult) {
387    let format_str = matches
388        .get_one::<String>("format")
389        .map(|s| s.as_str())
390        .unwrap_or("text");
391
392    match format_str {
393        "text" => {
394            println!("Method: {}", result.method_name);
395            println!("Outliers found: {}", result.outliers.len());
396
397            if !result.outliers.is_empty() {
398                println!("\nOutlier Details:");
399                for outlier in &result.outliers {
400                    println!("  Index: {} (Value: {:.3})", outlier.index, outlier.value);
401                }
402            }
403        }
404        "json" => {
405            use serde_json::json;
406            let output = json!({
407                "method_name": result.method_name,
408                "threshold": result.threshold,
409                "outliers_count": result.outliers.len(),
410                "outliers": result.outliers.iter().map(|o| json!({
411                    "index": o.index,
412                    "value": o.value,
413                    "score": o.score,
414                    "is_outlier": o.is_outlier
415                })).collect::<Vec<_>>()
416            });
417            println!("{}", serde_json::to_string_pretty(&output).unwrap());
418        }
419        _ => println!("Unsupported format for outlier detection"),
420    }
421}
422
423fn output_quality_control_result(matches: &clap::ArgMatches, result: &QualityControlResult) {
424    let format_str = matches
425        .get_one::<String>("format")
426        .map(|s| s.as_str())
427        .unwrap_or("text");
428
429    match format_str {
430        "text" => {
431            println!("Quality Control Analysis");
432            println!("Mean: {:.3}", result.mean);
433            println!("Standard Deviation: {:.3}", result.std_dev);
434
435            if let (Some(cp), Some(cpk)) = (result.cp, result.cpk) {
436                println!("Cp: {cp:.3}");
437                println!("Cpk: {cpk:.3}");
438
439                if let Some(ref capability) = result.process_capability {
440                    let cap_text = match capability {
441                        ProcessCapability::Excellent => "Excellent",
442                        ProcessCapability::Adequate => "Adequate",
443                        ProcessCapability::Poor => "Poor",
444                        ProcessCapability::Inadequate => "Inadequate",
445                    };
446                    println!("Process Capability: {cap_text}");
447                }
448            }
449
450            if let Some(within_spec) = result.within_spec_percent {
451                println!("Within Specification: {within_spec:.1}%");
452            }
453        }
454        "json" => {
455            use serde_json::json;
456            let output = json!({
457                "mean": result.mean,
458                "std_dev": result.std_dev,
459                "cp": result.cp,
460                "cpk": result.cpk,
461                "within_spec_percent": result.within_spec_percent,
462                "three_sigma_limits": result.three_sigma_limits,
463                "violations_count": result.control_chart_violations.len()
464            });
465            println!("{}", serde_json::to_string_pretty(&output).unwrap());
466        }
467        _ => println!("Unsupported format for quality control"),
468    }
469}
470
471fn print_text_output(result: &NormalResult, quiet: bool, verbose: bool, no_color: bool) {
472    if quiet {
473        println!("mean: {:.3}", result.mean);
474        println!("std_dev: {:.3}", result.std_dev);
475        println!("normality_score: {:.3}", result.normality_score);
476        return;
477    }
478
479    println!("Normal Distribution Analysis Results");
480    println!();
481    println!("Dataset: {}", result.dataset_name);
482    println!("Numbers analyzed: {}", result.numbers_analyzed);
483    println!("Quality Level: {:?}", result.risk_level);
484
485    println!();
486    println!("Distribution Histogram:");
487    println!("{}", format_normal_histogram(result));
488
489    println!();
490    println!("Distribution Parameters:");
491    println!("  Mean: {:.3}", result.mean);
492    println!("  Standard Deviation: {:.3}", result.std_dev);
493    println!("  Variance: {:.3}", result.variance);
494    println!("  Skewness: {:.3}", result.skewness);
495    println!("  Kurtosis: {:.3}", result.kurtosis);
496
497    if verbose {
498        println!();
499        println!("Normality Tests:");
500        println!(
501            "  Shapiro-Wilk: W={:.3}, p={:.3}",
502            result.shapiro_wilk_statistic, result.shapiro_wilk_p_value
503        );
504        println!(
505            "  Anderson-Darling: A²={:.3}, p={:.3}",
506            result.anderson_darling_statistic, result.anderson_darling_p_value
507        );
508        println!(
509            "  Kolmogorov-Smirnov: D={:.3}, p={:.3}",
510            result.kolmogorov_smirnov_statistic, result.kolmogorov_smirnov_p_value
511        );
512
513        println!();
514        println!("Quality Metrics:");
515        println!("  Normality Score: {:.3}", result.normality_score);
516        println!("  QQ Correlation: {:.3}", result.qq_correlation);
517        println!("  Distribution Quality: {:.3}", result.distribution_quality);
518
519        if !result.outliers_z_score.is_empty() {
520            println!();
521            println!("Outlier Detection:");
522            println!("  Z-score: {} outliers", result.outliers_z_score.len());
523            println!(
524                "  Modified Z-score: {} outliers",
525                result.outliers_modified_z.len()
526            );
527            println!("  IQR method: {} outliers", result.outliers_iqr.len());
528        }
529
530        println!();
531        println!("Sigma Coverage:");
532        println!("  1σ: {:.1}%", result.within_1_sigma_percent);
533        println!("  2σ: {:.1}%", result.within_2_sigma_percent);
534        println!("  3σ: {:.1}%", result.within_3_sigma_percent);
535
536        println!();
537        println!("Interpretation:");
538        print_normal_interpretation(result, no_color);
539    }
540}
541
542fn print_normal_interpretation(result: &NormalResult, no_color: bool) {
543    use lawkit_core::common::risk::RiskLevel;
544
545    match result.risk_level {
546        RiskLevel::Low => {
547            println!(
548                "{}",
549                colors::pass("[PASS] Data follows normal distribution well", no_color)
550            );
551            println!("   Suitable for standard statistical analysis");
552        }
553        RiskLevel::Medium => {
554            println!(
555                "{}",
556                colors::warn("[WARN] Data shows some deviation from normality", no_color)
557            );
558            println!("   Consider robust statistical methods");
559        }
560        RiskLevel::High => {
561            println!(
562                "{}",
563                colors::fail(
564                    "[FAIL] Data significantly deviates from normality",
565                    no_color
566                )
567            );
568            println!("   Non-parametric methods recommended");
569        }
570        RiskLevel::Critical => {
571            println!(
572                "{}",
573                colors::critical(
574                    "[CRITICAL] Data shows extreme deviation from normality",
575                    no_color
576                )
577            );
578            println!("   Requires special handling and investigation");
579        }
580    }
581
582    // 歪度・尖度に基づく解釈
583    if result.skewness.abs() > 1.0 {
584        if result.skewness > 0.0 {
585            println!(
586                "   {}",
587                colors::info("INFO: Data is right-skewed (positive skewness)", no_color)
588            );
589        } else {
590            println!(
591                "   {}",
592                colors::info("INFO: Data is left-skewed (negative skewness)", no_color)
593            );
594        }
595    }
596
597    if result.kurtosis > 1.0 {
598        println!(
599            "   {}",
600            colors::info("INFO: Data has heavy tails (high kurtosis)", no_color)
601        );
602    } else if result.kurtosis < -1.0 {
603        println!(
604            "   {}",
605            colors::info("INFO: Data has light tails (low kurtosis)", no_color)
606        );
607    }
608
609    // 異常値の解釈
610    if !result.outliers_z_score.is_empty() {
611        println!(
612            "   {}",
613            colors::alert(
614                &format!(
615                    "ALERT: Outliers detected: {}",
616                    result.outliers_z_score.len()
617                ),
618                no_color
619            )
620        );
621    }
622}
623
624fn print_json_output(result: &NormalResult) {
625    use serde_json::json;
626
627    let output = json!({
628        "dataset": result.dataset_name,
629        "numbers_analyzed": result.numbers_analyzed,
630        "risk_level": format!("{:?}", result.risk_level),
631        "mean": result.mean,
632        "std_dev": result.std_dev,
633        "variance": result.variance,
634        "skewness": result.skewness,
635        "kurtosis": result.kurtosis,
636        "shapiro_wilk": {
637            "statistic": result.shapiro_wilk_statistic,
638            "p_value": result.shapiro_wilk_p_value
639        },
640        "anderson_darling": {
641            "statistic": result.anderson_darling_statistic,
642            "p_value": result.anderson_darling_p_value
643        },
644        "kolmogorov_smirnov": {
645            "statistic": result.kolmogorov_smirnov_statistic,
646            "p_value": result.kolmogorov_smirnov_p_value
647        },
648        "normality_score": result.normality_score,
649        "qq_correlation": result.qq_correlation,
650        "distribution_quality": result.distribution_quality,
651        "outliers": {
652            "z_score_count": result.outliers_z_score.len(),
653            "modified_z_count": result.outliers_modified_z.len(),
654            "iqr_count": result.outliers_iqr.len()
655        },
656        "confidence_intervals": {
657            "mean_95": result.mean_confidence_interval,
658            "prediction_95": result.prediction_interval_95,
659            "three_sigma": result.three_sigma_limits
660        },
661        "sigma_coverage": {
662            "within_1_sigma": result.within_1_sigma_percent,
663            "within_2_sigma": result.within_2_sigma_percent,
664            "within_3_sigma": result.within_3_sigma_percent
665        }
666    });
667
668    println!("{}", serde_json::to_string_pretty(&output).unwrap());
669}
670
671fn print_csv_output(result: &NormalResult) {
672    println!("dataset,numbers_analyzed,risk_level,mean,std_dev,variance,skewness,kurtosis,normality_score");
673    println!(
674        "{},{},{:?},{:.3},{:.3},{:.3},{:.3},{:.3},{:.3}",
675        result.dataset_name,
676        result.numbers_analyzed,
677        result.risk_level,
678        result.mean,
679        result.std_dev,
680        result.variance,
681        result.skewness,
682        result.kurtosis,
683        result.normality_score
684    );
685}
686
687fn print_yaml_output(result: &NormalResult) {
688    println!("dataset: \"{}\"", result.dataset_name);
689    println!("numbers_analyzed: {}", result.numbers_analyzed);
690    println!("risk_level: \"{:?}\"", result.risk_level);
691    println!("mean: {:.3}", result.mean);
692    println!("std_dev: {:.3}", result.std_dev);
693    println!("variance: {:.3}", result.variance);
694    println!("skewness: {:.3}", result.skewness);
695    println!("kurtosis: {:.3}", result.kurtosis);
696    println!("normality_score: {:.3}", result.normality_score);
697}
698
699fn print_toml_output(result: &NormalResult) {
700    println!("dataset = \"{}\"", result.dataset_name);
701    println!("numbers_analyzed = {}", result.numbers_analyzed);
702    println!("risk_level = \"{:?}\"", result.risk_level);
703    println!("mean = {:.3}", result.mean);
704    println!("std_dev = {:.3}", result.std_dev);
705    println!("variance = {:.3}", result.variance);
706    println!("skewness = {:.3}", result.skewness);
707    println!("kurtosis = {:.3}", result.kurtosis);
708    println!("normality_score = {:.3}", result.normality_score);
709}
710
711fn print_xml_output(result: &NormalResult) {
712    println!("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
713    println!("<normal_analysis>");
714    println!("  <dataset>{}</dataset>", result.dataset_name);
715    println!(
716        "  <numbers_analyzed>{}</numbers_analyzed>",
717        result.numbers_analyzed
718    );
719    println!("  <risk_level>{:?}</risk_level>", result.risk_level);
720    println!("  <mean>{:.3}</mean>", result.mean);
721    println!("  <std_dev>{:.3}</std_dev>", result.std_dev);
722    println!("  <variance>{:.3}</variance>", result.variance);
723    println!("  <skewness>{:.3}</skewness>", result.skewness);
724    println!("  <kurtosis>{:.3}</kurtosis>", result.kurtosis);
725    println!(
726        "  <normality_score>{:.3}</normality_score>",
727        result.normality_score
728    );
729    println!("</normal_analysis>");
730}
731
732/// Analyze numbers with filtering and custom options
733fn analyze_numbers_with_options(
734    matches: &clap::ArgMatches,
735    dataset_name: String,
736    numbers: &[f64],
737) -> Result<NormalResult> {
738    // Apply number filtering if specified
739    let filtered_numbers = if let Some(filter_str) = matches.get_one::<String>("filter") {
740        let filter = NumberFilter::parse(filter_str)
741            .map_err(|e| BenfError::ParseError(format!("無効なフィルタ: {e}")))?;
742
743        let filtered = apply_number_filter(numbers, &filter);
744
745        // Inform user about filtering results
746        if filtered.len() != numbers.len() {
747            eprintln!(
748                "フィルタリング結果: {} 個の数値が {} 個に絞り込まれました ({})",
749                numbers.len(),
750                filtered.len(),
751                filter.description()
752            );
753        }
754
755        filtered
756    } else {
757        numbers.to_vec()
758    };
759
760    // Parse minimum count requirement
761    let min_count = if let Some(min_count_str) = matches.get_one::<String>("min-count") {
762        min_count_str
763            .parse::<usize>()
764            .map_err(|_| BenfError::ParseError("無効な最小数値数".to_string()))?
765    } else {
766        8 // 正規分布分析では最低8個必要
767    };
768
769    // Check minimum count requirement
770    if filtered_numbers.len() < min_count {
771        return Err(BenfError::InsufficientData(filtered_numbers.len()));
772    }
773
774    // Perform normal distribution analysis
775    analyze_normal_distribution(&filtered_numbers, &dataset_name)
776}
777
778/// 高度な異常値検出結果の出力
779fn output_advanced_outlier_result(_matches: &ArgMatches, result: &AdvancedOutlierResult) {
780    println!("Advanced Outlier Detection Result: {}", result.method_name);
781    println!("Detection rate: {:.3}", result.detection_rate);
782    println!("Threshold: {:.3}", result.threshold);
783    println!("Outliers found: {}", result.outliers.len());
784
785    if !result.outliers.is_empty() {
786        println!("\nOutlier Details:");
787        for outlier in &result.outliers {
788            println!(
789                "  Index {}: Value={:.3}, Score={:.3}, Confidence={:.3}",
790                outlier.index, outlier.value, outlier.outlier_score, outlier.confidence
791            );
792        }
793    }
794
795    if !result.method_params.is_empty() {
796        println!("\nMethod Parameters:");
797        for (param, value) in &result.method_params {
798            println!("  {param}: {value:.3}");
799        }
800    }
801}
802
803/// 標準偏差を計算するヘルパー関数
804fn calculate_std_dev(numbers: &[f64]) -> f64 {
805    if numbers.is_empty() {
806        return 0.0;
807    }
808
809    let mean = numbers.iter().sum::<f64>() / numbers.len() as f64;
810    let variance = numbers.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / numbers.len() as f64;
811    variance.sqrt()
812}
813
814/// 時系列分析結果の出力
815fn output_timeseries_result(_matches: &ArgMatches, result: &TimeSeriesAnalysis) {
816    println!("Time Series Analysis Results");
817    println!("============================");
818
819    // トレンド分析
820    println!("\nTrend Analysis:");
821    println!("  Slope: {:.6}", result.trend.slope);
822    println!("  R-squared: {:.3}", result.trend.r_squared);
823    println!("  Direction: {:?}", result.trend.direction);
824    println!("  Trend strength: {:.3}", result.trend.trend_strength);
825
826    // 季節性
827    if result.seasonality.detected {
828        println!("\nSeasonality Detected:");
829        if let Some(period) = result.seasonality.period {
830            println!("  Period: {period:.1}");
831        }
832        println!("  Strength: {:.3}", result.seasonality.strength);
833    } else {
834        println!("\nNo significant seasonality detected");
835    }
836
837    // 変化点
838    if !result.changepoints.is_empty() {
839        println!("\nChange Points Detected: {}", result.changepoints.len());
840        for (i, cp) in result.changepoints.iter().enumerate().take(5) {
841            println!(
842                "  {}: Index {}, Significance: {:.2}, Type: {:?}",
843                i + 1,
844                cp.index,
845                cp.significance,
846                cp.change_type
847            );
848        }
849    }
850
851    // 予測
852    if !result.forecasts.is_empty() {
853        println!("\nForecasts (next {} points):", result.forecasts.len());
854        for (i, forecast) in result.forecasts.iter().enumerate() {
855            println!(
856                "  {}: {:.3} (uncertainty: {:.3})",
857                i + 1,
858                forecast.predicted_value,
859                forecast.uncertainty
860            );
861        }
862    }
863
864    // 異常値
865    if !result.anomalies.is_empty() {
866        println!("\nAnomalies Detected: {}", result.anomalies.len());
867        for anomaly in result.anomalies.iter().take(10) {
868            println!(
869                "  Index {}: Value={:.3}, Expected={:.3}, Score={:.3}",
870                anomaly.index, anomaly.value, anomaly.expected_value, anomaly.anomaly_score
871            );
872        }
873    }
874
875    // データ品質
876    println!("\nData Quality Assessment:");
877    println!(
878        "  Completeness: {:.1}%",
879        result.statistics.data_quality.completeness * 100.0
880    );
881    println!(
882        "  Consistency: {:.1}%",
883        result.statistics.data_quality.consistency * 100.0
884    );
885    println!(
886        "  Outlier ratio: {:.1}%",
887        result.statistics.data_quality.outlier_ratio * 100.0
888    );
889    println!("  Noise level: {:.3}", result.statistics.noise_level);
890}
891
892fn format_normal_histogram(result: &NormalResult) -> String {
893    let mut output = String::new();
894    const CHART_WIDTH: usize = 50;
895    const BINS: usize = 10;
896
897    // 仮想データでヒストグラムをシミュレート（実際のデータはNormalResultから取得不可）
898    // 平均、標準偏差を使って理論的な正規分布カーブを表示
899    let mean = result.mean;
900    let std_dev = result.std_dev;
901
902    // -3σから+3σの範囲でビンを作成
903    let range_start = mean - 3.0 * std_dev;
904    let range_end = mean + 3.0 * std_dev;
905    let bin_width = (range_end - range_start) / BINS as f64;
906
907    // 各ビンの理論的確率密度を計算
908    let mut bin_densities = Vec::new();
909    let mut max_density: f64 = 0.0;
910
911    for i in 0..BINS {
912        let bin_center = range_start + (i as f64 + 0.5) * bin_width;
913        let z_score = (bin_center - mean) / std_dev;
914
915        // 正規分布の確率密度関数
916        let density =
917            (-0.5_f64 * z_score * z_score).exp() / (std_dev * (2.0 * std::f64::consts::PI).sqrt());
918        bin_densities.push(density);
919        max_density = max_density.max(density);
920    }
921
922    // ヒストグラムを表示
923    for (i, &density) in bin_densities.iter().enumerate() {
924        let bin_start = range_start + i as f64 * bin_width;
925        let bin_end = bin_start + bin_width;
926
927        let normalized_density = if max_density > 0.0 {
928            density / max_density
929        } else {
930            0.0
931        };
932        let bar_length = (normalized_density * CHART_WIDTH as f64).round() as usize;
933        let bar_length = bar_length.min(CHART_WIDTH);
934
935        // Calculate theoretical expected density for this bin
936        let theoretical_density = density / max_density; // This is already the normalized theoretical density
937        let expected_line_pos = (theoretical_density * CHART_WIDTH as f64).round() as usize;
938        let expected_line_pos = expected_line_pos.min(CHART_WIDTH - 1);
939
940        // Create bar with filled portion, expected value line, and background
941        let mut bar_chars = Vec::new();
942        for pos in 0..CHART_WIDTH {
943            if pos == expected_line_pos {
944                bar_chars.push('┃'); // Expected value line (theoretical density)
945            } else if pos < bar_length {
946                bar_chars.push('█'); // Filled portion
947            } else {
948                bar_chars.push('░'); // Background portion
949            }
950        }
951        let full_bar: String = bar_chars.iter().collect();
952
953        output.push_str(&format!(
954            "{:6.2}-{:6.2}: {} {:>5.1}%\n",
955            bin_start,
956            bin_end,
957            full_bar,
958            normalized_density * 100.0
959        ));
960    }
961
962    // 統計情報を追加
963    output.push_str(&format!(
964        "\nDistribution: μ={mean:.2}, σ={std_dev:.2}, Range: [{range_start:.2}, {range_end:.2}]"
965    ));
966
967    // σ範囲の情報
968    output.push_str(&format!(
969        "\n1σ: {:.1}%, 2σ: {:.1}%, 3σ: {:.1}%",
970        result.within_1_sigma_percent, result.within_2_sigma_percent, result.within_3_sigma_percent
971    ));
972
973    output
974}
lawkit_python/subcommands/normal.rs

lawkit_python/subcommands/
normal.rs