1use crate::error::{SortError, SortResult};
4use std::str::FromStr;
5
6#[derive(Debug, Clone)]
8pub struct SortKey {
9 pub start_field: usize,
11 pub start_char: Option<usize>,
13 pub end_field: Option<usize>,
15 pub end_char: Option<usize>,
17 pub options: SortKeyOptions,
19}
20
21#[derive(Debug, Clone, Default)]
23pub struct SortKeyOptions {
24 pub numeric: bool,
25 pub general_numeric: bool,
26 pub month: bool,
27 pub reverse: bool,
28 pub ignore_case: bool,
29 pub dictionary_order: bool,
30 pub ignore_leading_blanks: bool,
31 pub human_numeric: bool,
32 pub version: bool,
33 pub random: bool,
34}
35
36impl SortKey {
37 pub fn parse(keydef: &str) -> SortResult<Self> {
39 let parts: Vec<&str> = keydef.split(',').collect();
41 if parts.is_empty() || parts.len() > 2 {
42 return Err(SortError::parse_error(&format!(
43 "invalid key specification: {keydef}"
44 )));
45 }
46
47 let (start_field, start_char, start_opts) = Self::parse_field_spec(parts[0])?;
49
50 let (end_field, end_char, end_opts) = if parts.len() == 2 {
52 let (field, char_pos, opts) = Self::parse_field_spec(parts[1])?;
53 (Some(field), char_pos, opts)
54 } else {
55 (None, None, SortKeyOptions::default())
56 };
57
58 let mut options = start_opts;
60 if !options.numeric {
62 options.numeric = end_opts.numeric;
63 }
64 if !options.general_numeric {
65 options.general_numeric = end_opts.general_numeric;
66 }
67 if !options.month {
68 options.month = end_opts.month;
69 }
70 if !options.reverse {
71 options.reverse = end_opts.reverse;
72 }
73 if !options.ignore_case {
74 options.ignore_case = end_opts.ignore_case;
75 }
76 if !options.dictionary_order {
77 options.dictionary_order = end_opts.dictionary_order;
78 }
79 if !options.ignore_leading_blanks {
80 options.ignore_leading_blanks = end_opts.ignore_leading_blanks;
81 }
82 if !options.human_numeric {
83 options.human_numeric = end_opts.human_numeric;
84 }
85 if !options.version {
86 options.version = end_opts.version;
87 }
88 if !options.random {
89 options.random = end_opts.random;
90 }
91
92 Ok(Self {
93 start_field,
94 start_char,
95 end_field,
96 end_char,
97 options,
98 })
99 }
100
101 fn parse_field_spec(spec: &str) -> SortResult<(usize, Option<usize>, SortKeyOptions)> {
103 if spec.is_empty() {
104 return Err(SortError::parse_error("empty field specification"));
105 }
106
107 let mut chars = spec.chars().peekable();
108 let mut field_str = String::new();
109 let mut char_str = String::new();
110 let mut options = SortKeyOptions::default();
111
112 while let Some(&ch) = chars.peek() {
114 if ch.is_ascii_digit() {
115 field_str.push(ch);
116 chars.next();
117 } else {
118 break;
119 }
120 }
121
122 if field_str.is_empty() {
123 return Err(SortError::parse_error(&format!(
124 "invalid field specification: {spec}"
125 )));
126 }
127
128 let field = field_str
129 .parse::<usize>()
130 .map_err(|_| SortError::parse_error(&format!("invalid field number: {field_str}")))?;
131
132 if field == 0 {
133 return Err(SortError::parse_error("field numbers start at 1"));
134 }
135
136 let char_pos = if chars.peek() == Some(&'.') {
138 chars.next(); while let Some(&ch) = chars.peek() {
140 if ch.is_ascii_digit() {
141 char_str.push(ch);
142 chars.next();
143 } else {
144 break;
145 }
146 }
147
148 if char_str.is_empty() {
149 None
150 } else {
151 let pos = char_str.parse::<usize>().map_err(|_| {
152 SortError::parse_error(&format!("invalid character position: {char_str}"))
153 })?;
154 if pos == 0 {
155 return Err(SortError::parse_error("character positions start at 1"));
156 }
157 Some(pos)
158 }
159 } else {
160 None
161 };
162
163 for ch in chars {
165 match ch {
166 'n' => options.numeric = true,
167 'g' => options.general_numeric = true,
168 'M' => options.month = true,
169 'r' => options.reverse = true,
170 'f' => options.ignore_case = true,
171 'd' => options.dictionary_order = true,
172 'b' => options.ignore_leading_blanks = true,
173 'h' => options.human_numeric = true,
174 'V' => options.version = true,
175 'R' => options.random = true,
176 'i' => {} 'z' => {} _ => {
179 return Err(SortError::parse_error(&format!("invalid key option: {ch}")));
180 }
181 }
182 }
183
184 Ok((field, char_pos, options))
185 }
186}
187
188#[derive(Debug, Clone)]
190pub struct SortConfig {
191 pub mode: SortMode,
193 pub reverse: bool,
195 pub unique: bool,
197 pub stable: bool,
199 pub check: bool,
201 pub merge: bool,
203 pub zero_terminated: bool,
205 pub ignore_case: bool,
207 pub dictionary_order: bool,
209 pub ignore_leading_blanks: bool,
211 pub ignore_nonprinting: bool,
213 pub field_separator: Option<char>,
215 pub keys: Vec<SortKey>,
217 pub output_file: Option<String>,
219 pub buffer_size: Option<usize>,
221 pub parallel_threads: Option<usize>,
223 pub input_files: Vec<String>,
225 pub debug: bool,
227 pub compress_temp: bool,
229 pub temp_dir: Option<String>,
231}
232
233#[derive(Debug, Clone, Copy, PartialEq, Eq)]
235pub enum SortMode {
236 Lexicographic,
238 Numeric,
240 GeneralNumeric,
242 HumanNumeric,
244 Month,
246 Version,
248 Random,
250}
251
252#[derive(Debug, Clone, Copy, PartialEq, Eq)]
254pub enum SortOrder {
255 Ascending,
256 Descending,
257}
258
259impl Default for SortConfig {
260 fn default() -> Self {
261 Self {
262 mode: SortMode::Lexicographic,
263 reverse: false,
264 unique: false,
265 stable: false,
266 check: false,
267 merge: false,
268 zero_terminated: false,
269 ignore_case: false,
270 dictionary_order: false,
271 ignore_leading_blanks: false,
272 ignore_nonprinting: false,
273 field_separator: None,
274 keys: Vec::new(),
275 output_file: None,
276 buffer_size: None,
277 parallel_threads: None,
278 input_files: Vec::new(),
279 debug: false,
280 compress_temp: false,
281 temp_dir: None,
282 }
283 }
284}
285
286impl SortConfig {
287 pub fn new() -> Self {
289 Self::default()
290 }
291
292 pub fn with_mode(mut self, mode: SortMode) -> Self {
294 self.mode = mode;
295 self
296 }
297
298 pub fn with_reverse(mut self, reverse: bool) -> Self {
300 self.reverse = reverse;
301 self
302 }
303
304 pub fn with_unique(mut self, unique: bool) -> Self {
306 self.unique = unique;
307 self
308 }
309
310 pub fn with_stable(mut self, stable: bool) -> Self {
312 self.stable = stable;
313 self
314 }
315
316 pub fn with_check(mut self, check: bool) -> Self {
318 self.check = check;
319 self
320 }
321
322 pub fn with_merge(mut self, merge: bool) -> Self {
324 self.merge = merge;
325 self
326 }
327
328 pub fn with_zero_terminated(mut self, zero_terminated: bool) -> Self {
330 self.zero_terminated = zero_terminated;
331 self
332 }
333
334 pub fn with_field_separator(mut self, separator: Option<char>) -> Self {
336 self.field_separator = separator;
337 self
338 }
339
340 pub fn add_key(mut self, key: SortKey) -> Self {
342 self.keys.push(key);
343 self
344 }
345
346 pub fn with_output_file(mut self, output_file: Option<String>) -> Self {
348 self.output_file = output_file;
349 self
350 }
351
352 pub fn with_buffer_size(mut self, buffer_size: Option<usize>) -> Self {
354 self.buffer_size = buffer_size;
355 self
356 }
357
358 pub fn with_parallel_threads(mut self, threads: Option<usize>) -> Self {
360 self.parallel_threads = threads;
361 self
362 }
363
364 pub fn with_input_files(mut self, files: Vec<String>) -> Self {
366 self.input_files = files;
367 self
368 }
369
370 pub fn with_debug(mut self, debug: bool) -> Self {
372 self.debug = debug;
373 self
374 }
375
376 pub fn set_buffer_size_from_string(&mut self, size_str: &str) -> SortResult<()> {
378 let size = size_str
380 .parse::<usize>()
381 .map_err(|_| SortError::internal("Invalid buffer size"))?;
382 self.buffer_size = Some(size);
383 Ok(())
384 }
385
386 pub fn validate(&self) -> SortResult<()> {
388 if self.check && self.merge {
390 return Err(SortError::conflicting_options(
391 "cannot use both --check and --merge",
392 ));
393 }
394
395 if self.check && self.unique {
396 return Err(SortError::conflicting_options(
397 "--check is incompatible with --unique",
398 ));
399 }
400
401 if self.merge && self.unique {
402 }
404
405 if let Some(sep) = self.field_separator {
407 if sep == '\0' && !self.zero_terminated {
408 return Err(SortError::invalid_field_separator(
409 "null character separator requires -z option",
410 ));
411 }
412 }
413
414 if let Some(buffer_size) = self.buffer_size {
416 if buffer_size < 1024 {
417 return Err(SortError::invalid_buffer_size(
418 "buffer size too small (minimum 1KB)",
419 ));
420 }
421 const MAX_BUFFER_SIZE: u64 = 8 * 1024 * 1024 * 1024; if buffer_size as u64 > MAX_BUFFER_SIZE {
424 return Err(SortError::invalid_buffer_size(
425 "buffer size too large (maximum 8GB)",
426 ));
427 }
428 }
429
430 if let Some(threads) = self.parallel_threads {
432 if threads == 0 {
433 return Err(SortError::thread_pool_error(
434 "thread count must be positive",
435 ));
436 }
437 if threads > 1024 {
438 return Err(SortError::thread_pool_error(
439 "too many threads (maximum 1024)",
440 ));
441 }
442 }
443
444 Ok(())
445 }
446
447 pub fn sort_order(&self) -> SortOrder {
449 if self.reverse {
450 SortOrder::Descending
451 } else {
452 SortOrder::Ascending
453 }
454 }
455
456 pub fn random_sort(&self) -> bool {
458 matches!(self.mode, SortMode::Random)
459 }
460
461 pub fn numeric_sort(&self) -> bool {
463 matches!(
464 self.mode,
465 SortMode::Numeric | SortMode::GeneralNumeric | SortMode::HumanNumeric
466 )
467 }
468
469 pub fn has_typed_keys(&self) -> bool {
471 false }
473
474 pub fn input_file_count(&self) -> usize {
476 self.input_files.len()
477 }
478
479 pub fn reading_from_stdin(&self) -> bool {
481 self.input_files.is_empty() || (self.input_files.len() == 1 && self.input_files[0] == "-")
482 }
483
484 pub fn writing_to_stdout(&self) -> bool {
486 self.output_file.is_none()
487 }
488
489 pub fn effective_buffer_size(&self) -> usize {
491 self.buffer_size.unwrap_or(1024 * 1024) }
493
494 pub fn effective_thread_count(&self) -> usize {
496 self.parallel_threads.unwrap_or_else(num_cpus::get)
497 }
498
499 pub fn for_merge(&self) -> Self {
501 let mut config = self.clone();
502 config.merge = true;
503 config.check = false;
504 config
505 }
506
507 pub fn for_check(&self) -> Self {
509 let mut config = self.clone();
510 config.check = true;
511 config.merge = false;
512 config.unique = false; config
514 }
515}
516
517impl FromStr for SortMode {
518 type Err = SortError;
519
520 fn from_str(s: &str) -> Result<Self, Self::Err> {
521 match s.to_lowercase().as_str() {
522 "lexicographic" | "text" | "default" => Ok(SortMode::Lexicographic),
523 "numeric" | "n" => Ok(SortMode::Numeric),
524 "general-numeric" | "g" => Ok(SortMode::GeneralNumeric),
525 "human-numeric" | "h" => Ok(SortMode::HumanNumeric),
526 "month" | "m" => Ok(SortMode::Month),
527 "version" | "v" => Ok(SortMode::Version),
528 "random" | "r" => Ok(SortMode::Random),
529 _ => Err(SortError::parse_error(&format!("unknown sort mode: {s}"))),
530 }
531 }
532}
533
534impl std::fmt::Display for SortMode {
535 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
536 let name = match self {
537 SortMode::Lexicographic => "lexicographic",
538 SortMode::Numeric => "numeric",
539 SortMode::GeneralNumeric => "general-numeric",
540 SortMode::HumanNumeric => "human-numeric",
541 SortMode::Month => "month",
542 SortMode::Version => "version",
543 SortMode::Random => "random",
544 };
545 write!(f, "{name}")
546 }
547}
548
549pub struct SortConfigBuilder {
551 config: SortConfig,
552}
553
554impl SortConfigBuilder {
555 pub fn new() -> Self {
557 Self {
558 config: SortConfig::default(),
559 }
560 }
561
562 pub fn mode(mut self, mode: SortMode) -> Self {
564 self.config.mode = mode;
565 self
566 }
567
568 pub fn reverse(mut self) -> Self {
570 self.config.reverse = true;
571 self
572 }
573
574 pub fn unique(mut self) -> Self {
576 self.config.unique = true;
577 self
578 }
579
580 pub fn stable(mut self) -> Self {
582 self.config.stable = true;
583 self
584 }
585
586 pub fn check(mut self) -> Self {
588 self.config.check = true;
589 self
590 }
591
592 pub fn merge(mut self) -> Self {
594 self.config.merge = true;
595 self
596 }
597
598 pub fn zero_terminated(mut self) -> Self {
600 self.config.zero_terminated = true;
601 self
602 }
603
604 pub fn field_separator(mut self, separator: char) -> Self {
606 self.config.field_separator = Some(separator);
607 self
608 }
609
610 pub fn key(mut self, key: SortKey) -> Self {
612 self.config.keys.push(key);
613 self
614 }
615
616 pub fn output_file(mut self, file: String) -> Self {
618 self.config.output_file = Some(file);
619 self
620 }
621
622 pub fn buffer_size(mut self, size: usize) -> Self {
624 self.config.buffer_size = Some(size);
625 self
626 }
627
628 pub fn build(self) -> SortResult<SortConfig> {
630 self.config.validate()?;
631 Ok(self.config)
632 }
633}
634
635impl Default for SortConfigBuilder {
636 fn default() -> Self {
637 Self::new()
638 }
639}
640
641pub mod presets {
643 use super::*;
644
645 pub fn numeric() -> SortConfig {
647 SortConfig::new().with_mode(SortMode::Numeric)
648 }
649
650 pub fn version() -> SortConfig {
652 SortConfig::new().with_mode(SortMode::Version)
653 }
654
655 pub fn human_numeric() -> SortConfig {
657 SortConfig::new().with_mode(SortMode::HumanNumeric)
658 }
659
660 pub fn case_insensitive() -> SortConfig {
662 let mut config = SortConfig::new();
663 config.ignore_case = true;
664 config
665 }
666
667 pub fn unique() -> SortConfig {
669 SortConfig::new().with_unique(true)
670 }
671
672 pub fn reverse() -> SortConfig {
674 SortConfig::new().with_reverse(true)
675 }
676
677 pub fn stable() -> SortConfig {
679 SortConfig::new().with_stable(true)
680 }
681
682 pub fn merge() -> SortConfig {
684 SortConfig::new().with_merge(true)
685 }
686
687 pub fn check() -> SortConfig {
689 SortConfig::new().with_check(true)
690 }
691}
692
693#[cfg(test)]
694mod tests {
695 use super::*;
696
697 #[test]
698 fn test_default_config() {
699 let config = SortConfig::default();
700 assert_eq!(config.mode, SortMode::Lexicographic);
701 assert!(!config.reverse);
702 assert!(!config.unique);
703 assert!(!config.stable);
704 }
705
706 #[test]
707 fn test_config_builder() {
708 let config = SortConfigBuilder::new()
709 .mode(SortMode::Numeric)
710 .reverse()
711 .unique()
712 .build()
713 .expect("Failed to build test config");
714
715 assert_eq!(config.mode, SortMode::Numeric);
716 assert!(config.reverse);
717 assert!(config.unique);
718 }
719
720 #[test]
721 fn test_sort_mode_from_str() {
722 assert_eq!(
723 "numeric"
724 .parse::<SortMode>()
725 .expect("Failed to parse numeric mode"),
726 SortMode::Numeric
727 );
728 assert_eq!(
729 "version"
730 .parse::<SortMode>()
731 .expect("Failed to parse version mode"),
732 SortMode::Version
733 );
734 assert!("invalid".parse::<SortMode>().is_err());
735 }
736
737 #[test]
738 fn test_validate_conflicting_options() {
739 let config = SortConfig {
740 check: true,
741 merge: true,
742 ..Default::default()
743 };
744
745 assert!(config.validate().is_err());
746 }
747
748 #[test]
749 fn test_effective_buffer_size() {
750 let config = SortConfig::default();
751 assert_eq!(config.effective_buffer_size(), 1024 * 1024);
752
753 let config = SortConfig::default().with_buffer_size(Some(2048));
754 assert_eq!(config.effective_buffer_size(), 2048);
755 }
756
757 #[test]
758 fn test_presets() {
759 let config = presets::numeric();
760 assert_eq!(config.mode, SortMode::Numeric);
761
762 let config = presets::reverse();
763 assert!(config.reverse);
764
765 let config = presets::unique();
766 assert!(config.unique);
767 }
768
769 #[test]
770 fn test_reading_from_stdin() {
771 let config = SortConfig::default();
772 assert!(config.reading_from_stdin());
773
774 let config = SortConfig::default().with_input_files(vec!["-".to_string()]);
775 assert!(config.reading_from_stdin());
776
777 let config = SortConfig::default().with_input_files(vec!["file.txt".to_string()]);
778 assert!(!config.reading_from_stdin());
779 }
780}