1use clap::{Arg, Command};
2use lawkit_core::common::{memory::MemoryConfig, parallel::ParallelConfig};
3
4pub fn add_common_options(cmd: Command) -> Command {
6 cmd.arg(
7 Arg::new("format")
8 .long("format")
9 .short('f')
10 .value_name("FORMAT")
11 .help("Output format: text, csv, json, yaml, toml, xml")
12 .default_value("text"),
13 )
14 .arg(
15 Arg::new("quiet")
16 .long("quiet")
17 .short('q')
18 .help("Minimal output")
19 .action(clap::ArgAction::SetTrue),
20 )
21 .arg(
22 Arg::new("verbose")
23 .long("verbose")
24 .short('v')
25 .help("Detailed output")
26 .action(clap::ArgAction::SetTrue),
27 )
28 .arg(
29 Arg::new("filter")
30 .long("filter")
31 .value_name("RANGE")
32 .help("Filter numbers by range (e.g., >=100, <1000, 50-500)"),
33 )
34 .arg(
35 Arg::new("min-count")
36 .long("min-count")
37 .short('c')
38 .value_name("NUMBER")
39 .help("Minimum number of data points required for analysis")
40 .default_value("10"), )
42 .arg(
43 Arg::new("no-color")
44 .long("no-color")
45 .help("Disable colored output")
46 .action(clap::ArgAction::SetTrue),
47 )
48}
49
50pub fn add_generate_common_options(cmd: Command) -> Command {
52 cmd.arg(
53 Arg::new("quiet")
54 .long("quiet")
55 .short('q')
56 .help("Suppress progress messages")
57 .action(clap::ArgAction::SetTrue),
58 )
59 .arg(
60 Arg::new("verbose")
61 .long("verbose")
62 .short('v')
63 .help("Show generation details")
64 .action(clap::ArgAction::SetTrue),
65 )
66 .arg(
67 Arg::new("no-color")
68 .long("no-color")
69 .help("Disable colored output")
70 .action(clap::ArgAction::SetTrue),
71 )
72}
73
74pub fn add_input_arg(cmd: Command) -> Command {
76 cmd.arg(
77 Arg::new("input")
78 .help("Input data (file path, URL, or '-' for stdin)")
79 .index(1),
80 )
81}
82
83pub fn add_benf_options(cmd: Command) -> Command {
85 cmd.arg(
86 Arg::new("threshold")
87 .long("threshold")
88 .short('t')
89 .value_name("LEVEL")
90 .help("Anomaly detection threshold: low, medium, high, critical")
91 .default_value("auto"),
92 )
93 .arg(
94 Arg::new("confidence")
95 .long("confidence")
96 .value_name("LEVEL")
97 .help("Statistical confidence level for tests (0.01-0.99)")
98 .default_value("0.95"),
99 )
100 .arg(
101 Arg::new("sample-size")
102 .long("sample-size")
103 .value_name("NUMBER")
104 .help("Maximum sample size for large datasets (improves performance)"),
105 )
106 .arg(
107 Arg::new("min-value")
108 .long("min-value")
109 .value_name("VALUE")
110 .help("Minimum value to include in analysis (filters small values that add noise)"),
111 )
112}
113
114pub fn add_pareto_options(cmd: Command) -> Command {
116 cmd.arg(
117 Arg::new("concentration")
118 .long("concentration")
119 .short('C')
120 .value_name("THRESHOLD")
121 .help("Concentration threshold (0.0-1.0)")
122 .default_value("0.8"),
123 )
124 .arg(
125 Arg::new("gini-coefficient")
126 .long("gini-coefficient")
127 .help("Calculate Gini coefficient for inequality measurement")
128 .action(clap::ArgAction::SetTrue),
129 )
130 .arg(
131 Arg::new("percentiles")
132 .long("percentiles")
133 .value_name("PERCENTILES")
134 .help("Custom percentiles to calculate (e.g., 70,80,90)"),
135 )
136 .arg(
137 Arg::new("business-analysis")
138 .long("business-analysis")
139 .help("Enable business analysis insights")
140 .action(clap::ArgAction::SetTrue),
141 )
142}
143
144pub fn add_zipf_options(cmd: Command) -> Command {
146 cmd.arg(
147 Arg::new("text")
148 .long("text")
149 .short('T')
150 .help("Enable text analysis mode")
151 .action(clap::ArgAction::SetTrue),
152 )
153 .arg(
154 Arg::new("words")
155 .long("words")
156 .short('w')
157 .value_name("NUMBER")
158 .help("Maximum number of words to analyze in text mode")
159 .default_value("1000"),
160 )
161}
162
163pub fn add_normal_options(cmd: Command) -> Command {
165 cmd
166 .arg(
167 Arg::new("test")
168 .long("test")
169 .short('T')
170 .value_name("METHOD")
171 .help("Normality test method: shapiro, anderson, ks, all")
172 .default_value("all"),
173 )
174 .arg(
175 Arg::new("outliers")
176 .long("outliers")
177 .short('O')
178 .help("Enable outlier detection")
179 .action(clap::ArgAction::SetTrue),
180 )
181 .arg(
182 Arg::new("outlier-method")
183 .long("outlier-method")
184 .value_name("METHOD")
185 .help("Outlier detection method: zscore, modified_zscore, iqr, lof, isolation, dbscan, ensemble")
186 .default_value("zscore"),
187 )
188 .arg(
189 Arg::new("quality-control")
190 .long("quality-control")
191 .short('Q')
192 .help("Enable quality control analysis")
193 .action(clap::ArgAction::SetTrue),
194 )
195 .arg(
196 Arg::new("spec-limits")
197 .long("spec-limits")
198 .value_name("LOWER,UPPER")
199 .help("Specification limits for quality control (e.g., 9.5,10.5)"),
200 )
201 .arg(
202 Arg::new("enable-timeseries")
203 .long("enable-timeseries")
204 .help("Enable time series analysis")
205 .action(clap::ArgAction::SetTrue),
206 )
207 .arg(
208 Arg::new("timeseries-window")
209 .long("timeseries-window")
210 .value_name("SIZE")
211 .help("Time series analysis window size")
212 .default_value("10"),
213 )
214}
215
216pub fn add_poisson_options(cmd: Command) -> Command {
218 cmd.arg(
219 Arg::new("test")
220 .long("test")
221 .short('T')
222 .value_name("METHOD")
223 .help("Goodness-of-fit test method: chi_square, ks, variance, all")
224 .default_value("all"),
225 )
226 .arg(
227 Arg::new("predict")
228 .long("predict")
229 .short('p')
230 .help("Enable probability prediction")
231 .action(clap::ArgAction::SetTrue),
232 )
233 .arg(
234 Arg::new("max-events")
235 .long("max-events")
236 .value_name("NUMBER")
237 .help("Maximum number of events for analysis")
238 .default_value("20"),
239 )
240 .arg(
241 Arg::new("rare-events")
242 .long("rare-events")
243 .short('R')
244 .help("Focus on rare event analysis")
245 .action(clap::ArgAction::SetTrue),
246 )
247 .arg(
248 Arg::new("confidence")
249 .long("confidence")
250 .value_name("LEVEL")
251 .help("Statistical confidence level for tests (0.01-0.99)")
252 .default_value("0.95"),
253 )
254}
255
256pub fn add_generate_options(cmd: Command) -> Command {
258 cmd.arg(
259 Arg::new("samples")
260 .long("samples")
261 .short('s')
262 .value_name("NUMBER")
263 .help("Number of samples to generate")
264 .default_value("1000"),
265 )
266 .arg(
267 Arg::new("seed")
268 .long("seed")
269 .value_name("NUMBER")
270 .help("Random seed for reproducible generation"),
271 )
272 .arg(
273 Arg::new("output-file")
274 .long("output-file")
275 .short('o')
276 .value_name("FILE")
277 .help("Output file path (default: stdout)"),
278 )
279 .arg(
280 Arg::new("fraud-rate")
281 .long("fraud-rate")
282 .value_name("RATE")
283 .help("Fraud injection rate (0.0-1.0) for testing")
284 .default_value("0.0"),
285 )
286}
287
288pub fn add_generate_benf_options(cmd: Command) -> Command {
290 cmd.arg(
291 Arg::new("range")
292 .long("range")
293 .value_name("MIN,MAX")
294 .help("Number range for generation (e.g., 1,10000)")
295 .default_value("1,100000"),
296 )
297}
298
299pub fn add_generate_pareto_options(cmd: Command) -> Command {
301 cmd.arg(
302 Arg::new("concentration")
303 .long("concentration")
304 .short('C')
305 .value_name("RATIO")
306 .help("Concentration ratio (0.0-1.0, default: 0.8 for 80/20)")
307 .default_value("0.8"),
308 )
309 .arg(
310 Arg::new("scale")
311 .long("scale")
312 .value_name("NUMBER")
313 .help("Scale parameter for Pareto distribution")
314 .default_value("1.0"),
315 )
316}
317
318pub fn add_generate_zipf_options(cmd: Command) -> Command {
320 cmd.arg(
321 Arg::new("exponent")
322 .long("exponent")
323 .short('e')
324 .value_name("NUMBER")
325 .help("Zipf exponent (default: 1.0)")
326 .default_value("1.0"),
327 )
328 .arg(
329 Arg::new("vocabulary-size")
330 .long("vocabulary-size")
331 .short('V')
332 .value_name("NUMBER")
333 .help("Vocabulary size for text generation")
334 .default_value("10000"),
335 )
336}
337
338pub fn add_generate_normal_options(cmd: Command) -> Command {
340 cmd.arg(
341 Arg::new("mean")
342 .long("mean")
343 .short('m')
344 .value_name("NUMBER")
345 .help("Mean of normal distribution")
346 .default_value("0.0"),
347 )
348 .arg(
349 Arg::new("stddev")
350 .long("stddev")
351 .short('d')
352 .value_name("NUMBER")
353 .help("Standard deviation of normal distribution")
354 .default_value("1.0"),
355 )
356}
357
358pub fn add_generate_poisson_options(cmd: Command) -> Command {
360 cmd.arg(
361 Arg::new("lambda")
362 .long("lambda")
363 .short('l')
364 .value_name("NUMBER")
365 .help("Lambda parameter (rate) for Poisson distribution")
366 .default_value("2.0"),
367 )
368 .arg(
369 Arg::new("time-series")
370 .long("time-series")
371 .short('T')
372 .help("Generate time-series event data")
373 .action(clap::ArgAction::SetTrue),
374 )
375}
376
377pub fn add_integration_options(cmd: Command) -> Command {
379 cmd.arg(
380 Arg::new("laws")
381 .long("laws")
382 .short('l') .help("Laws to analyze (benf,pareto,zipf,normal,poisson)")
384 .value_name("LAWS"),
385 )
386 .arg(
387 Arg::new("focus")
388 .long("focus")
389 .short('F') .help("Analysis focus area")
391 .value_name("FOCUS")
392 .value_parser(["quality", "concentration", "distribution", "anomaly"]),
393 )
394 .arg(
395 Arg::new("threshold")
396 .long("threshold")
397 .short('t')
398 .help("Analysis threshold for anomaly detection (0.0-1.0)")
399 .value_name("THRESHOLD")
400 .value_parser(clap::value_parser!(f64))
401 .default_value("0.5"),
402 )
403 .arg(
404 Arg::new("recommend")
405 .long("recommend")
406 .short('r')
407 .help("Enable recommendation mode")
408 .action(clap::ArgAction::SetTrue),
409 )
410 .arg(
411 Arg::new("report")
412 .long("report")
413 .help("Analysis report type")
414 .value_name("TYPE")
415 .value_parser(["summary", "detailed", "anomalies"])
416 .default_value("summary"),
417 )
418 .arg(
419 Arg::new("consistency-check")
420 .long("consistency-check")
421 .help("Enable consistency check")
422 .action(clap::ArgAction::SetTrue),
423 )
424 .arg(
425 Arg::new("cross-validation")
426 .long("cross-validation")
427 .help("Enable cross-validation analysis")
428 .action(clap::ArgAction::SetTrue),
429 )
430 .arg(
431 Arg::new("confidence-level")
432 .long("confidence-level")
433 .help("Confidence level")
434 .value_name("LEVEL")
435 .value_parser(clap::value_parser!(f64))
436 .default_value("0.95"),
437 )
438 .arg(
439 Arg::new("purpose")
440 .long("purpose")
441 .short('p')
442 .help("Analysis purpose")
443 .value_name("PURPOSE")
444 .value_parser([
445 "quality",
446 "fraud",
447 "concentration",
448 "anomaly",
449 "distribution",
450 "general",
451 ]),
452 )
453}
454
455pub fn setup_automatic_optimization_config() -> (ParallelConfig, MemoryConfig) {
457 let parallel_config = ParallelConfig {
459 num_threads: 0, chunk_size: 1000,
461 enable_parallel: true,
462 };
463 let memory_config = MemoryConfig {
464 chunk_size: 10000,
465 max_memory_mb: 512,
466 enable_streaming: true,
467 enable_compression: false,
468 };
469 (parallel_config, memory_config)
470}
471
472pub fn get_optimized_reader(input: Option<&String>) -> Result<String, Box<dyn std::error::Error>> {
474 if let Some(input_path) = input {
476 if input_path == "-" {
477 use std::io::Read;
478 let mut buffer = String::new();
479 std::io::stdin().read_to_string(&mut buffer)?;
480 Ok(buffer)
481 } else {
482 std::fs::read_to_string(input_path).map_err(Into::into)
483 }
484 } else {
485 use std::io::Read;
486 let mut buffer = String::new();
487 std::io::stdin().read_to_string(&mut buffer)?;
488 Ok(buffer)
489 }
490}