rsv_lib/utils/
cmd_desc.rs

1pub const SIZE_DESC: &str = r#"
2Show filesize of a file.
3
4Usage: rsv size [FILENAME]
5
6Arguments:
7  [FILENAME]  File to open
8
9Options:
10  -h, --help  Print help
11"#;
12
13pub const COUNT_DESC: &str = r#"
14Count the number of lines in a file. When supplemented with a directory, 
15the command counts the number of files in the directory.
16
17Usage: 
18  rsv.exe count [OPTIONS] <FILENAME>
19  rsv count data.csv                 # data
20  rsv count --no-header data.csv     # no header
21  rsv count directory                # directory
22  rsv count EXCEL.xlsx               # EXCEL file
23
24Arguments:
25  <FILENAME>  File to open
26
27Options:
28      --no-header      Whether the file has a header
29  -S, --sheet <SHEET>  Get the nth worksheet of Excel file [default: 0]
30  -h, --help           Print help information (use `--help` for more detail)
31"#;
32
33pub const HEADER_DESC: &str = r#"
34Show file headers. 
35
36Usage: 
37  rsv.exe headers [OPTIONS] <FILENAME>
38  rsv headers data.csv
39  rsv headers -s \t data.csv          # tab separator
40  rsv headers --sheet 0 data.xlsx     # first sheet of Excel file
41  rsv headers --sheet 1 data.xlsx     # second sheet
42
43Arguments:
44  <FILENAME>           File to open, e.g., CSV, TXT, and EXCEL
45
46Options:
47  -s, --sep <SEP>      Field separator [default: ,]
48  -q, --quote <QUOTE>  Quote char [default: "]
49  -S, --sheet <SHEET>  Get the nth worksheet of EXCEL file [default: 0] 
50  -h, --help           Print help information
51"#;
52
53pub const HEAD_DESC: &str = r#"
54Show head n lines of file. The result could be formatted in an aligned table 
55by chaining with the <rsv table> command, e.g., rsv head data.csv | rsv table.
56
57Usage: 
58  rsv.exe head [OPTIONS] <FILENAME>
59  rsv head data.csv                   # default to show head 10
60  rsv head -n 5 data.csv              # show head 5
61  rsv head data.xlsx                  # EXCEL file, default to first sheet
62  rsv head --sheet 1 data.xlsx        # second sheet
63
64Arguments:
65  <FILENAME>           File to open, e.g., CSV, TXT, EXCEL or OTHERS
66
67Options:
68      --no-header      Whether the file has a header
69  -n, --n <N>          Number of records to show [default: 10]
70  -S, --sheet <SHEET>  Get the nth worksheet of EXCEL file [default: 0]
71  -h, --help           Print help information
72"#;
73
74pub const TAIL_DESC: &str = r#"
75Show tail n lines of file.
76
77Usage: 
78  rsv tail [OPTIONS] [FILENAME]
79  rsv tail data.csv               # default to show tail 10 records
80  rsv tail -n 5 data.csv          # tail 5 records
81  rsv tail data.xlsx              # EXCEL file, default to first sheet
82  rsv tail --sheet 1 data.xlsx    # the second sheet
83
84Arguments:
85  [FILENAME]           File to open
86
87Options:
88      --no-header      Whether the file has a header
89  -n, --n <N>          Number of records to show [default: 10]
90  -S, --sheet <SHEET>  Get the nth worksheet of EXCEL file [default: 0]
91  -E, --export         Export to a file named current-file-head.csv?
92  -h, --help           Print help
93"#;
94
95pub const ESTIMATE_DESC: &str = r#"
96Fast estimate the number of lines in a file. The command first read 20000 lines 
97(except the header) from the file, and then estimate average bytes of a line. 
98The total number of lines of file is estimated according to file size and average
99bytes per line.
100
101Usage: 
102  rsv.exe estimate <FILENAME>
103  rsv.exe estimate data.csv
104  rsv.exe estimate data.xlsx
105
106Arguments:
107  <FILENAME>  File to open
108
109Options:
110  -S, --sheet <SHEET>  Get the nth worksheet for an Excel file [default: 0]
111  -h, --help           Print help information 
112"#;
113
114pub const CLEAN_DESC: &str = r#"
115Clean file with escape chars (e.g. "). Other special strings can also be cleaned. 
116EXCEL files are not supported.
117
118Usage: 
119  rsv.exe clean [OPTIONS] <FILENAME>
120  rsv clean data.csv                             # default to clean escape char "
121  rsv clean -e "content-to-delete" data.csv      # clean str to empty
122  rsv clean -o new-file.csv data.csv             # save to new-file.csv
123
124Arguments:
125  <FILENAME>  File to open
126
127Options:
128  -o, --output <F>       Output file, default to current-file-cleaned.csv
129  -e, --escape <ESCAPE>  Escape char to clean [default: "]
130  -h, --help             Print help information
131"#;
132
133pub const FLATTEN_DESC: &str = r##"
134Prints flattened records to view them one by one. Records are separated
135by "#", which could be changed with the --delimiter flag. The command is 
136similar to "xsv flatten" command and "\G" command in Mysql.
137
138Usage: 
139  rsv.exe flatten [OPTIONS] <FILENAME>
140  rsv flatten data.csv                       # default to show first 5 records
141  rsv flatten -n 50 data.csv                 # show 50 records
142  rsv flatten --delimiter "--" data.csv      # change delimiter to anything
143  rsv flatten data.xlsx                      # EXCEL, default to first sheet
144  rsv flatten --sheet 1 data.xlsx            # EXCEL, second sheet
145
146Arguments:
147  <FILENAME>                   File to open, CSV, TXT, or EXCEL
148
149Options:
150  -s, --sep <SEP>              Separator [default: ,]
151  -q, --quote <QUOTE>          Quote Char [default: "]
152      --no-header              Whether the file has a header
153  -d, --delimiter <DELIMITER>  Line delimiter for printing [default: #]
154  -n, --n <N>                  Number of records to show, n=-1 to show all [default: 5]
155  -S, --sheet <SHEET>          Get the nth worksheet of EXCEL file [default: 0]
156  -h, --help                   Print help information
157"##;
158
159pub const SLICE_DESC: &str = r#"
160Extract a slice of rows from CSV, TXT or EXCEL file. The range is specified 
161as [start, end). If start is omitted , the slice starts from the first 
162record of CSV. If end is omitted, the slice expands to the last record of CSV.
163
164Slice length can also be specified by the --len flag. When a length is 
165specified, the end index is ignored.
166
167A single data record can by retrieved using the --index (shortened as -i) flag.
168when -i is specified, other flags (including start, end, length) are ignored.
169
170Usage: 
171  rsv slice [Options] <FILENAME>
172  rsv slice -s 100 -e 150 data.csv           # set start and end
173  rsv slice -s 100 -l 50 data.csv            # set start and length
174  rsv slice -s 100 -l 50 --export data.csv   # export to data-slice.csv
175  rsv slice -e 10 --export data.csv          # set end and export data
176  rsv slice -i 9 data.csv                    # the 10th line only
177  rsv slice -i 9 data.xlsx                   # EXCEL file
178
179Arguments:
180  <FILENAME>             File to open, including CSV, TXT, and EXCEL
181
182Options:
183  -s, --start <START>    Start index of file [default: 0]
184  -e, --end <END>        End index of file
185  -l, --length <LENGTH>  Slice length
186  -i, --index <INDEX>    Index for a single record
187      --no-header        Whether the file has a header
188  -E, --export           Export data to current-file-slice.csv
189  -S, --sheet <SHEET>    Get the nth worksheet of EXCEL file [default: 0]
190  -h, --help             Print help information
191"#;
192
193pub const FREQUENCY_DESC: &str = r#"
194Frequency table for one or multiple columns. Performance is optimized through parallel analyzing.
195
196Usage: 
197  rsv.exe frequency [OPTIONS] <FILENAME>
198  rsv frequency data.csv                   # default to the first column, descending order
199  rsv frequency -c 0,1,2,5 data.csv        # columns 0, 1, 2, and 5
200  rsv frequency -c 0-2,5 data.csv          # same as above
201  rsv frequency -c 0-2 --export data.csv   # export result to data-frequency.csv
202  rsv frequency -n 10 data.csv             # keep top 10 frequent items
203  rsv frequency -a 10 data.csv             # in ascending order
204  rsv frequency data.xlsx                  # EXCEL file
205
206Arguments:
207  <FILENAME>           File to open
208
209Options:
210  -s, --sep <SEP>      Separator [default: ,]
211  -q, --quote <QUOTE>  Quote Char [default: "]
212      --no-header      Whether the file has a header
213  -c, --cols <COLS>    Columns to generate frequency table [default: 0]
214  -a, --ascending      Show frequency table in ascending order
215  -E, --export         Export result to a frequency.csv file
216  -n, --n <N>          Top N to keep in frequency table [default: -1]
217  -S, --sheet <SHEET>  Work for the nth worksheet of EXCEL file [default: 0]
218  -h, --help           Print help information
219
220Column selection syntax:
221  -c 0,1,2,5    -->    cols [0,1,2,5]
222  -c 0-2,5      -->    same as cols [0,1,2,5]
223  -c -1         -->    last column
224  -c -2--1      -->    last two columns
225"#;
226
227pub const SPLIT_DESC: &str = r#"
228Split a large, unordered file into separate files in two ways:
2291. Sequentially: Use --size to specify the number of rows in a file;
2302. Based on a Column Value: Use --col or -c flag to specify a column.
231
232When splitting based on a column value, the output directory will be automatically 
233created within the current data directory. Each small file will be named after the
234unique values found in the specified column.
235
236Following procedures are implemented to enhance performance:
2371. Data is analyzed in chunks (default size: 50MB) rather than line by line, minimizing
238the overhead of repeatedly opening and closing small files.
2392. Chunked data is processed in parallel using the Rayon library.
240
241By default, the tool splits the file based on the values of first column.
242
243Usage: 
244  rsv.exe split [OPTIONS] <FILENAME>
245  rsv split data.csv               # default to split based on first column 
246  rsv split -c 1 data.csv          # second column to split
247  rsv split -c 0 -s \t data.csv    # first column, \t separator
248  rsv split data.xlsx              # EXCEL file
249  rsv split --size 1000 data.xlsx  # sequential split, 1000 records per file.
250
251Arguments:
252  <FILENAME>           File to open
253
254Sequential Split options: 
255  --size <SIZE>        Number of records to write in each separate file
256
257Column-based Split Options: 
258  -s, --sep <SEP>      Separator [default: ,]
259  -q, --quote <QUOTE>  Quote Char [default: "]
260      --no-header      Whether the file has a header
261  -c, --col <COL>      Column to split upon [default: 0]
262  -S, --sheet <SHEET>  Get the nth worksheet of EXCEL file [default: 0]
263      --size <SIZE>    Number of records to write in each separate file
264  -h, --help           Print help information
265"#;
266
267pub const SELECT_DESC: &str = r#"
268Select rows and columns by filter. Row and column filter syntaxes
269are listed as below. Output can be exported with the --export flag.
270
271Usage: 
272  rsv.exe select [OPTIONS] <FILENAME>
273  rsv select -f 0=a,b,c data.csv          # filter first column valued as a, b, or c
274  rsv select -f "0N>10&1=c" data.csv      # first column > 10 numerically, AND the second column equals c
275  rsv select -f 0!= --export data.csv     # first column is not empty, and export result
276  rsv select -f 0=a,b data.xlsx           # apply to EXCEL file
277  rsv select -f "0>@1-10*(@3+2)" data.csv # math calculation based on column 2 (@1) and column 4 (@3)
278                                          # left column is treated as numeric automatically
279
280Arguments:
281  <FILENAME>                File to open
282
283Options:
284  -s, --sep <SEP>           Separator [default: ,]
285  -q, --quote <QUOTE>       Quote Char [default: "]
286      --no-header           Whether the file has a header
287  -c, --cols <COLS>         Columns to select, see column select syntax below; Default to select ALL
288  -f, --filter <FILTER>     Row filter, see row filter syntax below; Default to NONE
289  -E, --export              Export results to a current-file-selected.csv file
290  -S, --sheet <SHEET>       Get the nth worksheet of EXCEL file [default: 0]
291  -h, --help                Print help information
292
293Filter syntax, support =, !=, >, >=, <, <= and &:
294  -f 0=a,b,c           -->  first column is a, b, or c
295  -f 0N=1,2            -->  first column numerically equals to 1 or 2
296  -f 0!=               -->  first column is not empty
297  -f "0>=2022-01-21"   -->  first column equal to or bigger than 2022-01-21, lexicographically
298  -f "0N>10"           -->  first column > 10 numerically
299  -f "0N>10&2=pattern" -->  first column > 10 numerically, AND the third column equals to <pattern>
300  
301Math express syntax (support +, -, *, /, %, ^, (, )):
302-f "0>@1 + 1"         -->   first column > second column plus one
303-f "0>=(@1+1)/(2^2)"  -->   first column >= (second column + 1) / (2 ^ 2)
304
305Column selection syntax:
306  -c 0,1,2,5          -->   cols [0,1,2,5]
307  -c 0-2,5            -->   same as cols [0,1,2,5]
308  -c -1               -->   last column
309  -c -2--1            -->   last two columns
310"#;
311
312pub const STATS_DESC: &str = r#"
313Statistics for columns, including min, max, mean, unique, null. Within the command, 
314columns are regarded as either an Int, Float or String. When the column is String, 
315min, max, mean are ignored. When the column is Float, the unique stat is ignored.
316The command processes data in batches and in parallel.
317
318Usage: 
319  rsv stats [OPTIONS] <FILENAME>
320  rsv stats data.csv                       # all columns
321  rsv stats -c 0,1 data.csv                # first two columns
322  rsv stats -c 0,1 --export data.csv       # export statistics to data-stats.csv
323  rsv stats -c 0,1 --export data.xlsx      # EXCEL file
324
325Arguments:
326  <FILENAME>             File to open, including CSV, TXT, and EXCEL
327
328Options:
329  -s, --sep <SEP>        Separator [default: ,]
330      --no-header        Whether the file has a header
331  -c, --cols <COLS>      Columns to generate statistics, Default to select all
332  -E, --export           Export results to a file named current-file-selected.csv
333  -S, --sheet <SHEET>    Get the nth worksheet of EXCEL file [default: 0]
334  -h, --help             Print help information
335
336Column selection syntax:
337  -c 0,1,2,5       -->   cols [0,1,2,5]
338  -c 0-2,5         -->   same as cols [0,1,2,5]
339  -c -1            -->   last column
340  -c -2--1         -->   last two columns
341"#;
342
343pub const EXCEL2CSV_DESC: &str = r#"
344Convert EXCEL to CSV.
345
346Usage: 
347  rsv.exe excel2csv [OPTIONS] <FILENAME>
348  rsv excel2csv data.xlsx              # default to first sheet
349  rsv excel2csv --sheet 1 data.xlsx    # second sheet
350
351Arguments:
352  <FILENAME>           File to open
353
354Options:
355  -S, --sheet <SHEET>  Get the nth worksheet of EXCEL file [default: 0]
356  -s, --sep <SEP>      Separator [default: ,]
357  -q, --quote <QUOTE>  Quote char [default: "]
358  -h, --help           Print help information
359"#;
360
361pub const TABLE_DESC: &str = r#"
362Show data in an aligned table.
363
364Usage: 
365  rsv.exe table [OPTIONS]
366  rsv head data.csv | rsv table                # convert result to an aligned table
367  rsv slice -s 10 -e 15 data.csv | rsv table   # convert result to an aligned table
368
369Options:
370  -s, --sep <SEP>         Separator [default: ,]
371  -q, --quote <QUOTE>     Quote char [default: "]
372  -h, --help              Print help information
373"#;
374
375pub const SEARCH_DESC: &str = r#"
376Search file with regexes. Regex syntax is to be found at: 
377https://docs.rs/regex/latest/regex/#syntax. The command reads file in chunks and 
378processes a chunk in parallel based on Rayon.
379
380Usage: 
381  rsv.exe search [OPTIONS] <PATTERN> <FILENAME>
382  rsv search PATTERN data.csv                     # regex search a PATTERN
383  rsv search -f 0,1 PATTERN data.xlsx             # search the first two columns
384  rsv search "^\d{4}-\d{2}-\d{2}$" data.csv       # search dates
385  rsv search --export PATTERN data.csv            # export result
386  rsv search PATTERN data.xlsx                    # search EXCEL file
387  rsv search -S all PATTERN data.xlsx             # search all sheets of EXCEL
388
389Arguments:
390  <PATTERN>              Regex pattern to search
391  <FILENAME>             File to open
392
393Options:
394  -s, --sep <SEP>        Separator [default: ,]
395  -q, --quote <QUOTE>    Quote char [default: "]
396      --no-header        Whether the file has a header
397  -f, --filter <FILTER>  Columns to search [default: all]
398  -c, --cols <COLS>      Columns to keep in output [default: all]
399  -S, --sheet <SHEET>    Search the nth worksheet of EXCEL file [default: 0], can search all sheets with -S all
400  -E, --export           Export to current-file-searched.csv
401  -h, --help             Print help information
402
403Column selection syntax:
404  -c 0,1,2,5        -->  cols [0,1,2,5]
405  -c 0-2,5          -->  same as cols [0,1,2,5]
406  -c -1             -->  last column
407  -c -2--1          -->  last two columns
408"#;
409
410pub const SORT_DESC: &str = r##"
411Sort data by column(s). The sort is performed in-memory, so that large files
412are not supported. The command supports sorting for at most two columns. 
413
414The default is ascending sort. Descending sort can be specified with the 
415-c 0D flag, where D stands for Descending Sort.  
416
417The default is string sorting. Numeric sorting can be specified with the
418-c 0N flag, where N stands for Numeric Sorting.
419
420D (descending) and N (numeric) can be placed in arbitrary order, e.g., 
421-c 0DN or -c 0ND.
422
423Usage: 
424  rsv sort [OPTIONS] [FILENAME]
425  rsv sort -c 0 data.csv        # default to sort first column in ascending
426  rsv sort -c 0D data.csv       # descending sort
427  rsv sort -c 0DN data.csv      # sort as numeric values
428  rsv sort -c 0DN,2N data.csv   # sort two columns
429  rsv sort -E data.csv          # export result
430  rsv sort data.xlsx            # sort EXCEL file
431
432Arguments:
433  [FILENAME]             File to open
434
435Options:
436  -s, --sep <SEP>        Separator [default: ,]
437  -q, --quote <QUOTE>    Quote char [default: "]
438      --no-header        Whether the file has a header
439  -c, --cols <COLS>      Columns to sort by, e.g., -c 0, -c 0N, -c 0ND [default: 0]
440  -S, --sheet <SHEET>    Get the nth worksheet of EXCEL file [default: 0]
441  -E, --export           Export to a file named current-file-sorted.csv?
442  -h, --help             Print help
443"##;
444
445pub const TO_DESC: &str = r#"
446Save data to disk, can be one of TXT, CSV, TSV, XLSX or XLS. 
447Usually applied to save medium results in a command line chain.
448When export to xlsx, column width in Excel is adjusted automatically.
449
450Usage:
451  rsv to [OPTIONS] <OUT> [FILENAME]
452  rsv head data | rsv to out.csv
453  rsv head data | rsv to out.txt
454  rsv head data | rsv to out.tsv
455  rsv head data | rsv to out.xlsx
456
457Arguments:
458  <OUT>                  Output file, a file name or a file format
459  [FILENAME]             File to open
460
461Options:
462      --no-header        Whether the file has a header
463  -s, --sep <SEP>        Input file Separator [default: ,]
464  -q, --quote <QUOTE>    Quote char [default: "]
465  -o, --outsep <OUTSEP>  Output file Separator [default: ,]
466  -S, --sheet <SHEET>    Get the nth worksheet of EXCEL file [default: 0]
467  -h, --help             Print help
468"#;
469
470pub const SAMPLE_DESC: &str = r#"
471Sampling data from file.
472
473Usage: 
474  rsv sample [OPTIONS] [FILENAME]
475  rsv sample data.csv                  # default to sample 10 records
476  rsv sample --no-header data.csv      # no-header
477  rsv sample -n 20 data.csv            # pull more
478  rsv sample -n 20 data.xlsx           # EXCEL file
479  rsv sample --seed 100 data.xlsx      # set a seed
480  rsv sample --show-number data.xlsx   # show line numbers
481  rsv sample --time-limit 2 data.xlsx  # set time limit to 2 seconds for large file
482  rsv sample -n 20 --export data.xlsx  # data export
483
484Arguments:
485  [FILENAME]                     File to open
486
487Options:
488      --no-header                Whether the file has a header
489  -S, --sheet <SHEET>            Get the nth worksheet of EXCEL file [default: 0]
490  -n, --n <N>                    Sample size [default: 10]
491      --seed <SEED>              Get the nth worksheet of EXCEL file
492  -E, --export                   Export to a file named current-file-searched.csv
493      --show-number              Show line number
494  -t, --time-limit <TIME_LIMIT>  Time limit [default: infinity]
495  -h, --help                     Print help
496"#;
497
498pub const UNIQUE_DESC: &str = r#"
499Drop duplicates of data in a file.
500
501Usage: 
502  rsv unique [OPTIONS] [FILENAME]
503  rsv unique data.csv               # default to drop duplicates on all columns,
504                                    # default keep first record of duplicates
505  rsv unique -c 0 data.csv          # drop on first column
506  rsv unique -c 0,1 data.csv        # drop on first and second columns
507  rsv unique --keep-last data.csv   # keep the last record when dropping
508  rsv unique data.xlsx              # apply to EXCEL file
509  rsv unique data.txt               # apply to TXT file
510
511Arguments:
512  [FILENAME]              File to open, could be EXCEL, TXT, CSV files
513
514Options:
515  -s, --sep <SEP>         Separator [default: ,]
516  -q, --quote <QUOTE>     Quote char [default: "]
517      --no-header         Whether the file has a header
518  -c, --cols <COLS>       Columns to filter [default: all columns]
519      --keep-last         keep first or last
520  -S, --sheet <SHEET>     Get the nth worksheet of EXCEL file [default: 0]
521  -E, --export            Export to drop-duplicates.csv
522  -h, --help              Print help
523
524Column selection syntax:
525  -c 0,1,2,5       -->    cols [0,1,2,5]
526  -c 0-2,5         -->    same as cols [0,1,2,5]
527  -c -1            -->    last column
528  -c -2--1         -->    last two columns
529"#;