1use clap::{Arg, Command};
2use lawkit_core::common::{memory::MemoryConfig, parallel::ParallelConfig};
3
4pub fn add_common_options(cmd: Command) -> Command {
6 cmd.arg(
7 Arg::new("format")
8 .long("format")
9 .short('f')
10 .value_name("FORMAT")
11 .help("Output format: text, csv, json, yaml, toml, xml")
12 .default_value("text"),
13 )
14 .arg(
15 Arg::new("quiet")
16 .long("quiet")
17 .short('q')
18 .help("Minimal output")
19 .action(clap::ArgAction::SetTrue),
20 )
21 .arg(
22 Arg::new("verbose")
23 .long("verbose")
24 .short('v')
25 .help("Detailed output")
26 .action(clap::ArgAction::SetTrue),
27 )
28 .arg(
29 Arg::new("filter")
30 .long("filter")
31 .value_name("RANGE")
32 .help("Filter numbers by range (e.g., >=100, <1000, 50-500)"),
33 )
34 .arg(
35 Arg::new("min-count")
36 .long("min-count")
37 .short('c')
38 .value_name("NUMBER")
39 .help("Minimum number of data points required for analysis")
40 .default_value("10"), )
42}
43
44pub fn add_input_arg(cmd: Command) -> Command {
46 cmd.arg(
47 Arg::new("input")
48 .help("Input data (file path, URL, or '-' for stdin)")
49 .index(1),
50 )
51}
52
53pub fn add_benf_options(cmd: Command) -> Command {
55 cmd.arg(
56 Arg::new("threshold")
57 .long("threshold")
58 .short('t')
59 .value_name("LEVEL")
60 .help("Anomaly detection threshold: low, medium, high, critical")
61 .default_value("auto"),
62 )
63 .arg(
64 Arg::new("confidence")
65 .long("confidence")
66 .value_name("LEVEL")
67 .help("Statistical confidence level for tests (0.01-0.99)")
68 .default_value("0.95"),
69 )
70 .arg(
71 Arg::new("sample-size")
72 .long("sample-size")
73 .value_name("NUMBER")
74 .help("Maximum sample size for large datasets (improves performance)"),
75 )
76 .arg(
77 Arg::new("min-value")
78 .long("min-value")
79 .value_name("VALUE")
80 .help("Minimum value to include in analysis (filters small values that add noise)"),
81 )
82}
83
84pub fn add_pareto_options(cmd: Command) -> Command {
86 cmd.arg(
87 Arg::new("concentration")
88 .long("concentration")
89 .short('C')
90 .value_name("THRESHOLD")
91 .help("Concentration threshold (0.0-1.0)")
92 .default_value("0.8"),
93 )
94 .arg(
95 Arg::new("gini-coefficient")
96 .long("gini-coefficient")
97 .help("Calculate Gini coefficient for inequality measurement")
98 .action(clap::ArgAction::SetTrue),
99 )
100 .arg(
101 Arg::new("percentiles")
102 .long("percentiles")
103 .value_name("PERCENTILES")
104 .help("Custom percentiles to calculate (e.g., 70,80,90)"),
105 )
106 .arg(
107 Arg::new("business-analysis")
108 .long("business-analysis")
109 .help("Enable business analysis insights")
110 .action(clap::ArgAction::SetTrue),
111 )
112}
113
114pub fn add_zipf_options(cmd: Command) -> Command {
116 cmd.arg(
117 Arg::new("text")
118 .long("text")
119 .short('T')
120 .help("Enable text analysis mode")
121 .action(clap::ArgAction::SetTrue),
122 )
123 .arg(
124 Arg::new("words")
125 .long("words")
126 .short('w')
127 .value_name("NUMBER")
128 .help("Maximum number of words to analyze in text mode")
129 .default_value("1000"),
130 )
131}
132
133pub fn add_normal_options(cmd: Command) -> Command {
135 cmd
136 .arg(
137 Arg::new("test")
138 .long("test")
139 .short('T')
140 .value_name("METHOD")
141 .help("Normality test method: shapiro, anderson, ks, all")
142 .default_value("all"),
143 )
144 .arg(
145 Arg::new("outliers")
146 .long("outliers")
147 .short('O')
148 .help("Enable outlier detection")
149 .action(clap::ArgAction::SetTrue),
150 )
151 .arg(
152 Arg::new("outlier-method")
153 .long("outlier-method")
154 .value_name("METHOD")
155 .help("Outlier detection method: zscore, modified_zscore, iqr, lof, isolation, dbscan, ensemble")
156 .default_value("zscore"),
157 )
158 .arg(
159 Arg::new("quality-control")
160 .long("quality-control")
161 .short('Q')
162 .help("Enable quality control analysis")
163 .action(clap::ArgAction::SetTrue),
164 )
165 .arg(
166 Arg::new("spec-limits")
167 .long("spec-limits")
168 .value_name("LOWER,UPPER")
169 .help("Specification limits for quality control (e.g., 9.5,10.5)"),
170 )
171 .arg(
172 Arg::new("enable-timeseries")
173 .long("enable-timeseries")
174 .help("Enable time series analysis")
175 .action(clap::ArgAction::SetTrue),
176 )
177 .arg(
178 Arg::new("timeseries-window")
179 .long("timeseries-window")
180 .value_name("SIZE")
181 .help("Time series analysis window size")
182 .default_value("10"),
183 )
184}
185
186pub fn add_poisson_options(cmd: Command) -> Command {
188 cmd.arg(
189 Arg::new("test")
190 .long("test")
191 .short('T')
192 .value_name("METHOD")
193 .help("Goodness-of-fit test method: chi_square, ks, variance, all")
194 .default_value("all"),
195 )
196 .arg(
197 Arg::new("predict")
198 .long("predict")
199 .short('p')
200 .help("Enable probability prediction")
201 .action(clap::ArgAction::SetTrue),
202 )
203 .arg(
204 Arg::new("max-events")
205 .long("max-events")
206 .value_name("NUMBER")
207 .help("Maximum number of events for analysis")
208 .default_value("20"),
209 )
210 .arg(
211 Arg::new("rare-events")
212 .long("rare-events")
213 .short('R')
214 .help("Focus on rare event analysis")
215 .action(clap::ArgAction::SetTrue),
216 )
217 .arg(
218 Arg::new("confidence")
219 .long("confidence")
220 .value_name("LEVEL")
221 .help("Statistical confidence level for tests (0.01-0.99)")
222 .default_value("0.95"),
223 )
224}
225
226pub fn add_generate_options(cmd: Command) -> Command {
228 cmd.arg(
229 Arg::new("samples")
230 .long("samples")
231 .short('s')
232 .value_name("NUMBER")
233 .help("Number of samples to generate")
234 .default_value("1000"),
235 )
236 .arg(
237 Arg::new("seed")
238 .long("seed")
239 .value_name("NUMBER")
240 .help("Random seed for reproducible generation"),
241 )
242 .arg(
243 Arg::new("output-file")
244 .long("output-file")
245 .short('o')
246 .value_name("FILE")
247 .help("Output file path (default: stdout)"),
248 )
249 .arg(
250 Arg::new("fraud-rate")
251 .long("fraud-rate")
252 .value_name("RATE")
253 .help("Fraud injection rate (0.0-1.0) for testing")
254 .default_value("0.0"),
255 )
256}
257
258pub fn add_generate_benf_options(cmd: Command) -> Command {
260 cmd.arg(
261 Arg::new("range")
262 .long("range")
263 .value_name("MIN,MAX")
264 .help("Number range for generation (e.g., 1,10000)")
265 .default_value("1,100000"),
266 )
267}
268
269pub fn add_generate_pareto_options(cmd: Command) -> Command {
271 cmd.arg(
272 Arg::new("concentration")
273 .long("concentration")
274 .short('C')
275 .value_name("RATIO")
276 .help("Concentration ratio (0.0-1.0, default: 0.8 for 80/20)")
277 .default_value("0.8"),
278 )
279 .arg(
280 Arg::new("scale")
281 .long("scale")
282 .value_name("NUMBER")
283 .help("Scale parameter for Pareto distribution")
284 .default_value("1.0"),
285 )
286}
287
288pub fn add_generate_zipf_options(cmd: Command) -> Command {
290 cmd.arg(
291 Arg::new("exponent")
292 .long("exponent")
293 .short('e')
294 .value_name("NUMBER")
295 .help("Zipf exponent (default: 1.0)")
296 .default_value("1.0"),
297 )
298 .arg(
299 Arg::new("vocabulary-size")
300 .long("vocabulary-size")
301 .short('V')
302 .value_name("NUMBER")
303 .help("Vocabulary size for text generation")
304 .default_value("10000"),
305 )
306}
307
308pub fn add_generate_normal_options(cmd: Command) -> Command {
310 cmd.arg(
311 Arg::new("mean")
312 .long("mean")
313 .short('m')
314 .value_name("NUMBER")
315 .help("Mean of normal distribution")
316 .default_value("0.0"),
317 )
318 .arg(
319 Arg::new("stddev")
320 .long("stddev")
321 .short('d')
322 .value_name("NUMBER")
323 .help("Standard deviation of normal distribution")
324 .default_value("1.0"),
325 )
326}
327
328pub fn add_generate_poisson_options(cmd: Command) -> Command {
330 cmd.arg(
331 Arg::new("lambda")
332 .long("lambda")
333 .short('l')
334 .value_name("NUMBER")
335 .help("Lambda parameter (rate) for Poisson distribution")
336 .default_value("2.0"),
337 )
338 .arg(
339 Arg::new("time-series")
340 .long("time-series")
341 .short('T')
342 .help("Generate time-series event data")
343 .action(clap::ArgAction::SetTrue),
344 )
345}
346
347pub fn add_integration_options(cmd: Command) -> Command {
349 cmd.arg(
350 Arg::new("laws")
351 .long("laws")
352 .short('l') .help("Laws to analyze (benf,pareto,zipf,normal,poisson)")
354 .value_name("LAWS"),
355 )
356 .arg(
357 Arg::new("focus")
358 .long("focus")
359 .short('F') .help("Analysis focus area")
361 .value_name("FOCUS")
362 .value_parser(["quality", "concentration", "distribution", "anomaly"]),
363 )
364 .arg(
365 Arg::new("threshold")
366 .long("threshold")
367 .short('t')
368 .help("Analysis threshold for anomaly detection (0.0-1.0)")
369 .value_name("THRESHOLD")
370 .value_parser(clap::value_parser!(f64))
371 .default_value("0.5"),
372 )
373 .arg(
374 Arg::new("recommend")
375 .long("recommend")
376 .short('r')
377 .help("Enable recommendation mode")
378 .action(clap::ArgAction::SetTrue),
379 )
380 .arg(
381 Arg::new("report")
382 .long("report")
383 .help("Analysis report type")
384 .value_name("TYPE")
385 .value_parser(["summary", "detailed", "anomalies"])
386 .default_value("summary"),
387 )
388 .arg(
389 Arg::new("consistency-check")
390 .long("consistency-check")
391 .help("Enable consistency check")
392 .action(clap::ArgAction::SetTrue),
393 )
394 .arg(
395 Arg::new("cross-validation")
396 .long("cross-validation")
397 .help("Enable cross-validation analysis")
398 .action(clap::ArgAction::SetTrue),
399 )
400 .arg(
401 Arg::new("confidence-level")
402 .long("confidence-level")
403 .help("Confidence level")
404 .value_name("LEVEL")
405 .value_parser(clap::value_parser!(f64))
406 .default_value("0.95"),
407 )
408 .arg(
409 Arg::new("purpose")
410 .long("purpose")
411 .short('p')
412 .help("Analysis purpose")
413 .value_name("PURPOSE")
414 .value_parser([
415 "quality",
416 "fraud",
417 "concentration",
418 "anomaly",
419 "distribution",
420 "general",
421 ]),
422 )
423}
424
425pub fn setup_automatic_optimization_config() -> (ParallelConfig, MemoryConfig) {
427 let parallel_config = ParallelConfig {
429 num_threads: 0, chunk_size: 1000,
431 enable_parallel: true,
432 };
433 let memory_config = MemoryConfig {
434 chunk_size: 10000,
435 max_memory_mb: 512,
436 enable_streaming: true,
437 enable_compression: false,
438 };
439 (parallel_config, memory_config)
440}
441
442pub fn get_optimized_reader(input: Option<&String>) -> Result<String, Box<dyn std::error::Error>> {
444 if let Some(input_path) = input {
446 if input_path == "-" {
447 use std::io::Read;
448 let mut buffer = String::new();
449 std::io::stdin().read_to_string(&mut buffer)?;
450 Ok(buffer)
451 } else {
452 std::fs::read_to_string(input_path).map_err(Into::into)
453 }
454 } else {
455 use std::io::Read;
456 let mut buffer = String::new();
457 std::io::stdin().read_to_string(&mut buffer)?;
458 Ok(buffer)
459 }
460}