1pub const SIZE_DESC: &str = r#"
2Show filesize of a file.
3
4Usage: rsv size [FILENAME]
5
6Arguments:
7 [FILENAME] File to open
8
9Options:
10 -h, --help Print help
11"#;
12
13pub const COUNT_DESC: &str = r#"
14Count the number of lines in a file. When supplemented with a directory,
15the command counts the number of files in the directory.
16
17Usage:
18 rsv.exe count [OPTIONS] <FILENAME>
19 rsv count data.csv # data
20 rsv count --no-header data.csv # no header
21 rsv count directory # directory
22 rsv count EXCEL.xlsx # EXCEL file
23
24Arguments:
25 <FILENAME> File to open
26
27Options:
28 --no-header Whether the file has a header
29 -S, --sheet <SHEET> Get the nth worksheet of Excel file [default: 0]
30 -h, --help Print help information (use `--help` for more detail)
31"#;
32
33pub const HEADER_DESC: &str = r#"
34Show file headers.
35
36Usage:
37 rsv.exe headers [OPTIONS] <FILENAME>
38 rsv headers data.csv
39 rsv headers -s \t data.csv # tab separator
40 rsv headers --sheet 0 data.xlsx # first sheet of Excel file
41 rsv headers --sheet 1 data.xlsx # second sheet
42
43Arguments:
44 <FILENAME> File to open, e.g., CSV, TXT, and EXCEL
45
46Options:
47 -s, --sep <SEP> Field separator [default: ,]
48 -q, --quote <QUOTE> Quote char [default: "]
49 -S, --sheet <SHEET> Get the nth worksheet of EXCEL file [default: 0]
50 -h, --help Print help information
51"#;
52
53pub const HEAD_DESC: &str = r#"
54Show head n lines of file. The result could be formatted in an aligned table
55by chaining with the <rsv table> command, e.g., rsv head data.csv | rsv table.
56
57Usage:
58 rsv.exe head [OPTIONS] <FILENAME>
59 rsv head data.csv # default to show head 10
60 rsv head -n 5 data.csv # show head 5
61 rsv head data.xlsx # EXCEL file, default to first sheet
62 rsv head --sheet 1 data.xlsx # second sheet
63
64Arguments:
65 <FILENAME> File to open, e.g., CSV, TXT, EXCEL or OTHERS
66
67Options:
68 --no-header Whether the file has a header
69 -n, --n <N> Number of records to show [default: 10]
70 -S, --sheet <SHEET> Get the nth worksheet of EXCEL file [default: 0]
71 -h, --help Print help information
72"#;
73
74pub const TAIL_DESC: &str = r#"
75Show tail n lines of file.
76
77Usage:
78 rsv tail [OPTIONS] [FILENAME]
79 rsv tail data.csv # default to show tail 10 records
80 rsv tail -n 5 data.csv # tail 5 records
81 rsv tail data.xlsx # EXCEL file, default to first sheet
82 rsv tail --sheet 1 data.xlsx # the second sheet
83
84Arguments:
85 [FILENAME] File to open
86
87Options:
88 --no-header Whether the file has a header
89 -n, --n <N> Number of records to show [default: 10]
90 -S, --sheet <SHEET> Get the nth worksheet of EXCEL file [default: 0]
91 -E, --export Export to a file named current-file-head.csv?
92 -h, --help Print help
93"#;
94
95pub const ESTIMATE_DESC: &str = r#"
96Fast estimate the number of lines in a file. The command first read 20000 lines
97(except the header) from the file, and then estimate average bytes of a line.
98The total number of lines of file is estimated according to file size and average
99bytes per line.
100
101Usage:
102 rsv.exe estimate <FILENAME>
103 rsv.exe estimate data.csv
104 rsv.exe estimate data.xlsx
105
106Arguments:
107 <FILENAME> File to open
108
109Options:
110 -S, --sheet <SHEET> Get the nth worksheet for an Excel file [default: 0]
111 -h, --help Print help information
112"#;
113
114pub const CLEAN_DESC: &str = r#"
115Clean file with escape chars (e.g. "). Other special strings can also be cleaned.
116EXCEL files are not supported.
117
118Usage:
119 rsv.exe clean [OPTIONS] <FILENAME>
120 rsv clean data.csv # default to clean escape char "
121 rsv clean -e "content-to-delete" data.csv # clean str to empty
122 rsv clean -o new-file.csv data.csv # save to new-file.csv
123
124Arguments:
125 <FILENAME> File to open
126
127Options:
128 -o, --output <F> Output file, default to current-file-cleaned.csv
129 -e, --escape <ESCAPE> Escape char to clean [default: "]
130 -h, --help Print help information
131"#;
132
133pub const FLATTEN_DESC: &str = r##"
134Prints flattened records to view them one by one. Records are separated
135by "#", which could be changed with the --delimiter flag. The command is
136similar to "xsv flatten" command and "\G" command in Mysql.
137
138Usage:
139 rsv.exe flatten [OPTIONS] <FILENAME>
140 rsv flatten data.csv # default to show first 5 records
141 rsv flatten -n 50 data.csv # show 50 records
142 rsv flatten --delimiter "--" data.csv # change delimiter to anything
143 rsv flatten data.xlsx # EXCEL, default to first sheet
144 rsv flatten --sheet 1 data.xlsx # EXCEL, second sheet
145
146Arguments:
147 <FILENAME> File to open, CSV, TXT, or EXCEL
148
149Options:
150 -s, --sep <SEP> Separator [default: ,]
151 -q, --quote <QUOTE> Quote Char [default: "]
152 --no-header Whether the file has a header
153 -d, --delimiter <DELIMITER> Line delimiter for printing [default: #]
154 -n, --n <N> Number of records to show, n=-1 to show all [default: 5]
155 -S, --sheet <SHEET> Get the nth worksheet of EXCEL file [default: 0]
156 -h, --help Print help information
157"##;
158
159pub const SLICE_DESC: &str = r#"
160Extract a slice of rows from CSV, TXT or EXCEL file. The range is specified
161as [start, end). If start is omitted , the slice starts from the first
162record of CSV. If end is omitted, the slice expands to the last record of CSV.
163
164Slice length can also be specified by the --len flag. When a length is
165specified, the end index is ignored.
166
167A single data record can by retrieved using the --index (shortened as -i) flag.
168when -i is specified, other flags (including start, end, length) are ignored.
169
170Usage:
171 rsv slice [Options] <FILENAME>
172 rsv slice -s 100 -e 150 data.csv # set start and end
173 rsv slice -s 100 -l 50 data.csv # set start and length
174 rsv slice -s 100 -l 50 --export data.csv # export to data-slice.csv
175 rsv slice -e 10 --export data.csv # set end and export data
176 rsv slice -i 9 data.csv # the 10th line only
177 rsv slice -i 9 data.xlsx # EXCEL file
178
179Arguments:
180 <FILENAME> File to open, including CSV, TXT, and EXCEL
181
182Options:
183 -s, --start <START> Start index of file [default: 0]
184 -e, --end <END> End index of file
185 -l, --length <LENGTH> Slice length
186 -i, --index <INDEX> Index for a single record
187 --no-header Whether the file has a header
188 -E, --export Export data to current-file-slice.csv
189 -S, --sheet <SHEET> Get the nth worksheet of EXCEL file [default: 0]
190 -h, --help Print help information
191"#;
192
193pub const FREQUENCY_DESC: &str = r#"
194Frequency table for one or multiple columns. Performance is optimized through parallel analyzing.
195
196Usage:
197 rsv.exe frequency [OPTIONS] <FILENAME>
198 rsv frequency data.csv # default to the first column, descending order
199 rsv frequency -c 0,1,2,5 data.csv # columns 0, 1, 2, and 5
200 rsv frequency -c 0-2,5 data.csv # same as above
201 rsv frequency -c 0-2 --export data.csv # export result to data-frequency.csv
202 rsv frequency -n 10 data.csv # keep top 10 frequent items
203 rsv frequency -a 10 data.csv # in ascending order
204 rsv frequency data.xlsx # EXCEL file
205
206Arguments:
207 <FILENAME> File to open
208
209Options:
210 -s, --sep <SEP> Separator [default: ,]
211 -q, --quote <QUOTE> Quote Char [default: "]
212 --no-header Whether the file has a header
213 -c, --cols <COLS> Columns to generate frequency table [default: 0]
214 -a, --ascending Show frequency table in ascending order
215 -E, --export Export result to a frequency.csv file
216 -n, --n <N> Top N to keep in frequency table [default: -1]
217 -S, --sheet <SHEET> Work for the nth worksheet of EXCEL file [default: 0]
218 -h, --help Print help information
219
220Column selection syntax:
221 -c 0,1,2,5 --> cols [0,1,2,5]
222 -c 0-2,5 --> same as cols [0,1,2,5]
223 -c -1 --> last column
224 -c -2--1 --> last two columns
225"#;
226
227pub const SPLIT_DESC: &str = r#"
228Split a large, unordered file into separate files in two ways:
2291. Sequentially: Use --size to specify the number of rows in a file;
2302. Based on a Column Value: Use --col or -c flag to specify a column.
231
232When splitting based on a column value, the output directory will be automatically
233created within the current data directory. Each small file will be named after the
234unique values found in the specified column.
235
236Following procedures are implemented to enhance performance:
2371. Data is analyzed in chunks (default size: 50MB) rather than line by line, minimizing
238the overhead of repeatedly opening and closing small files.
2392. Chunked data is processed in parallel using the Rayon library.
240
241By default, the tool splits the file based on the values of first column.
242
243Usage:
244 rsv.exe split [OPTIONS] <FILENAME>
245 rsv split data.csv # default to split based on first column
246 rsv split -c 1 data.csv # second column to split
247 rsv split -c 0 -s \t data.csv # first column, \t separator
248 rsv split data.xlsx # EXCEL file
249 rsv split --size 1000 data.xlsx # sequential split, 1000 records per file.
250
251Arguments:
252 <FILENAME> File to open
253
254Sequential Split options:
255 --size <SIZE> Number of records to write in each separate file
256
257Column-based Split Options:
258 -s, --sep <SEP> Separator [default: ,]
259 -q, --quote <QUOTE> Quote Char [default: "]
260 --no-header Whether the file has a header
261 -c, --col <COL> Column to split upon [default: 0]
262 -S, --sheet <SHEET> Get the nth worksheet of EXCEL file [default: 0]
263 --size <SIZE> Number of records to write in each separate file
264 -h, --help Print help information
265"#;
266
267pub const SELECT_DESC: &str = r#"
268Select rows and columns by filter. Row and column filter syntaxes
269are listed as below. Output can be exported with the --export flag.
270
271Usage:
272 rsv.exe select [OPTIONS] <FILENAME>
273 rsv select -f 0=a,b,c data.csv # filter first column valued as a, b, or c
274 rsv select -f "0N>10&1=c" data.csv # first column > 10 numerically, AND the second column equals c
275 rsv select -f 0!= --export data.csv # first column is not empty, and export result
276 rsv select -f 0=a,b data.xlsx # apply to EXCEL file
277 rsv select -f "0>@1-10*(@3+2)" data.csv # math calculation based on column 2 (@1) and column 4 (@3)
278 # left column is treated as numeric automatically
279
280Arguments:
281 <FILENAME> File to open
282
283Options:
284 -s, --sep <SEP> Separator [default: ,]
285 -q, --quote <QUOTE> Quote Char [default: "]
286 --no-header Whether the file has a header
287 -c, --cols <COLS> Columns to select, see column select syntax below; Default to select ALL
288 -f, --filter <FILTER> Row filter, see row filter syntax below; Default to NONE
289 -E, --export Export results to a current-file-selected.csv file
290 -S, --sheet <SHEET> Get the nth worksheet of EXCEL file [default: 0]
291 -h, --help Print help information
292
293Filter syntax, support =, !=, >, >=, <, <= and &:
294 -f 0=a,b,c --> first column is a, b, or c
295 -f 0N=1,2 --> first column numerically equals to 1 or 2
296 -f 0!= --> first column is not empty
297 -f "0>=2022-01-21" --> first column equal to or bigger than 2022-01-21, lexicographically
298 -f "0N>10" --> first column > 10 numerically
299 -f "0N>10&2=pattern" --> first column > 10 numerically, AND the third column equals to <pattern>
300
301Math express syntax (support +, -, *, /, %, ^, (, )):
302-f "0>@1 + 1" --> first column > second column plus one
303-f "0>=(@1+1)/(2^2)" --> first column >= (second column + 1) / (2 ^ 2)
304
305Column selection syntax:
306 -c 0,1,2,5 --> cols [0,1,2,5]
307 -c 0-2,5 --> same as cols [0,1,2,5]
308 -c -1 --> last column
309 -c -2--1 --> last two columns
310"#;
311
312pub const STATS_DESC: &str = r#"
313Statistics for columns, including min, max, mean, unique, null. Within the command,
314columns are regarded as either an Int, Float or String. When the column is String,
315min, max, mean are ignored. When the column is Float, the unique stat is ignored.
316The command processes data in batches and in parallel.
317
318Usage:
319 rsv stats [OPTIONS] <FILENAME>
320 rsv stats data.csv # all columns
321 rsv stats -c 0,1 data.csv # first two columns
322 rsv stats -c 0,1 --export data.csv # export statistics to data-stats.csv
323 rsv stats -c 0,1 --export data.xlsx # EXCEL file
324
325Arguments:
326 <FILENAME> File to open, including CSV, TXT, and EXCEL
327
328Options:
329 -s, --sep <SEP> Separator [default: ,]
330 --no-header Whether the file has a header
331 -c, --cols <COLS> Columns to generate statistics, Default to select all
332 -E, --export Export results to a file named current-file-selected.csv
333 -S, --sheet <SHEET> Get the nth worksheet of EXCEL file [default: 0]
334 -h, --help Print help information
335
336Column selection syntax:
337 -c 0,1,2,5 --> cols [0,1,2,5]
338 -c 0-2,5 --> same as cols [0,1,2,5]
339 -c -1 --> last column
340 -c -2--1 --> last two columns
341"#;
342
343pub const EXCEL2CSV_DESC: &str = r#"
344Convert EXCEL to CSV.
345
346Usage:
347 rsv.exe excel2csv [OPTIONS] <FILENAME>
348 rsv excel2csv data.xlsx # default to first sheet
349 rsv excel2csv --sheet 1 data.xlsx # second sheet
350
351Arguments:
352 <FILENAME> File to open
353
354Options:
355 -S, --sheet <SHEET> Get the nth worksheet of EXCEL file [default: 0]
356 -s, --sep <SEP> Separator [default: ,]
357 -q, --quote <QUOTE> Quote char [default: "]
358 -h, --help Print help information
359"#;
360
361pub const TABLE_DESC: &str = r#"
362Show data in an aligned table.
363
364Usage:
365 rsv.exe table [OPTIONS]
366 rsv head data.csv | rsv table # convert result to an aligned table
367 rsv slice -s 10 -e 15 data.csv | rsv table # convert result to an aligned table
368
369Options:
370 -s, --sep <SEP> Separator [default: ,]
371 -q, --quote <QUOTE> Quote char [default: "]
372 -h, --help Print help information
373"#;
374
375pub const SEARCH_DESC: &str = r#"
376Search file with regexes. Regex syntax is to be found at:
377https://docs.rs/regex/latest/regex/#syntax. The command reads file in chunks and
378processes a chunk in parallel based on Rayon.
379
380Usage:
381 rsv.exe search [OPTIONS] <PATTERN> <FILENAME>
382 rsv search PATTERN data.csv # regex search a PATTERN
383 rsv search -f 0,1 PATTERN data.xlsx # search the first two columns
384 rsv search "^\d{4}-\d{2}-\d{2}$" data.csv # search dates
385 rsv search --export PATTERN data.csv # export result
386 rsv search PATTERN data.xlsx # search EXCEL file
387 rsv search -S all PATTERN data.xlsx # search all sheets of EXCEL
388
389Arguments:
390 <PATTERN> Regex pattern to search
391 <FILENAME> File to open
392
393Options:
394 -s, --sep <SEP> Separator [default: ,]
395 -q, --quote <QUOTE> Quote char [default: "]
396 --no-header Whether the file has a header
397 -f, --filter <FILTER> Columns to search [default: all]
398 -c, --cols <COLS> Columns to keep in output [default: all]
399 -S, --sheet <SHEET> Search the nth worksheet of EXCEL file [default: 0], can search all sheets with -S all
400 -E, --export Export to current-file-searched.csv
401 -h, --help Print help information
402
403Column selection syntax:
404 -c 0,1,2,5 --> cols [0,1,2,5]
405 -c 0-2,5 --> same as cols [0,1,2,5]
406 -c -1 --> last column
407 -c -2--1 --> last two columns
408"#;
409
410pub const SORT_DESC: &str = r##"
411Sort data by column(s). The sort is performed in-memory, so that large files
412are not supported. The command supports sorting for at most two columns.
413
414The default is ascending sort. Descending sort can be specified with the
415-c 0D flag, where D stands for Descending Sort.
416
417The default is string sorting. Numeric sorting can be specified with the
418-c 0N flag, where N stands for Numeric Sorting.
419
420D (descending) and N (numeric) can be placed in arbitrary order, e.g.,
421-c 0DN or -c 0ND.
422
423Usage:
424 rsv sort [OPTIONS] [FILENAME]
425 rsv sort -c 0 data.csv # default to sort first column in ascending
426 rsv sort -c 0D data.csv # descending sort
427 rsv sort -c 0DN data.csv # sort as numeric values
428 rsv sort -c 0DN,2N data.csv # sort two columns
429 rsv sort -E data.csv # export result
430 rsv sort data.xlsx # sort EXCEL file
431
432Arguments:
433 [FILENAME] File to open
434
435Options:
436 -s, --sep <SEP> Separator [default: ,]
437 -q, --quote <QUOTE> Quote char [default: "]
438 --no-header Whether the file has a header
439 -c, --cols <COLS> Columns to sort by, e.g., -c 0, -c 0N, -c 0ND [default: 0]
440 -S, --sheet <SHEET> Get the nth worksheet of EXCEL file [default: 0]
441 -E, --export Export to a file named current-file-sorted.csv?
442 -h, --help Print help
443"##;
444
445pub const TO_DESC: &str = r#"
446Save data to disk, can be one of TXT, CSV, TSV, XLSX or XLS.
447Usually applied to save medium results in a command line chain.
448When export to xlsx, column width in Excel is adjusted automatically.
449
450Usage:
451 rsv to [OPTIONS] <OUT> [FILENAME]
452 rsv head data | rsv to out.csv
453 rsv head data | rsv to out.txt
454 rsv head data | rsv to out.tsv
455 rsv head data | rsv to out.xlsx
456
457Arguments:
458 <OUT> Output file, a file name or a file format
459 [FILENAME] File to open
460
461Options:
462 --no-header Whether the file has a header
463 -s, --sep <SEP> Input file Separator [default: ,]
464 -q, --quote <QUOTE> Quote char [default: "]
465 -o, --outsep <OUTSEP> Output file Separator [default: ,]
466 -S, --sheet <SHEET> Get the nth worksheet of EXCEL file [default: 0]
467 -h, --help Print help
468"#;
469
470pub const SAMPLE_DESC: &str = r#"
471Sampling data from file.
472
473Usage:
474 rsv sample [OPTIONS] [FILENAME]
475 rsv sample data.csv # default to sample 10 records
476 rsv sample --no-header data.csv # no-header
477 rsv sample -n 20 data.csv # pull more
478 rsv sample -n 20 data.xlsx # EXCEL file
479 rsv sample --seed 100 data.xlsx # set a seed
480 rsv sample --show-number data.xlsx # show line numbers
481 rsv sample --time-limit 2 data.xlsx # set time limit to 2 seconds for large file
482 rsv sample -n 20 --export data.xlsx # data export
483
484Arguments:
485 [FILENAME] File to open
486
487Options:
488 --no-header Whether the file has a header
489 -S, --sheet <SHEET> Get the nth worksheet of EXCEL file [default: 0]
490 -n, --n <N> Sample size [default: 10]
491 --seed <SEED> Get the nth worksheet of EXCEL file
492 -E, --export Export to a file named current-file-searched.csv
493 --show-number Show line number
494 -t, --time-limit <TIME_LIMIT> Time limit [default: infinity]
495 -h, --help Print help
496"#;
497
498pub const UNIQUE_DESC: &str = r#"
499Drop duplicates of data in a file.
500
501Usage:
502 rsv unique [OPTIONS] [FILENAME]
503 rsv unique data.csv # default to drop duplicates on all columns,
504 # default keep first record of duplicates
505 rsv unique -c 0 data.csv # drop on first column
506 rsv unique -c 0,1 data.csv # drop on first and second columns
507 rsv unique --keep-last data.csv # keep the last record when dropping
508 rsv unique data.xlsx # apply to EXCEL file
509 rsv unique data.txt # apply to TXT file
510
511Arguments:
512 [FILENAME] File to open, could be EXCEL, TXT, CSV files
513
514Options:
515 -s, --sep <SEP> Separator [default: ,]
516 -q, --quote <QUOTE> Quote char [default: "]
517 --no-header Whether the file has a header
518 -c, --cols <COLS> Columns to filter [default: all columns]
519 --keep-last keep first or last
520 -S, --sheet <SHEET> Get the nth worksheet of EXCEL file [default: 0]
521 -E, --export Export to drop-duplicates.csv
522 -h, --help Print help
523
524Column selection syntax:
525 -c 0,1,2,5 --> cols [0,1,2,5]
526 -c 0-2,5 --> same as cols [0,1,2,5]
527 -c -1 --> last column
528 -c -2--1 --> last two columns
529"#;