gnu_sort/
config.rs

1//! Configuration management for sort operations
2
3use crate::error::{SortError, SortResult};
4use std::str::FromStr;
5
6/// Sort key specification for field-based sorting
7#[derive(Debug, Clone)]
8pub struct SortKey {
9    /// Starting field number (1-based)
10    pub start_field: usize,
11    /// Starting character position within field (1-based, optional)
12    pub start_char: Option<usize>,
13    /// Ending field number (1-based, optional)
14    pub end_field: Option<usize>,
15    /// Ending character position within field (1-based, optional)
16    pub end_char: Option<usize>,
17    /// Sort options specific to this key
18    pub options: SortKeyOptions,
19}
20
21/// Options specific to a sort key
22#[derive(Debug, Clone, Default)]
23pub struct SortKeyOptions {
24    pub numeric: bool,
25    pub general_numeric: bool,
26    pub month: bool,
27    pub reverse: bool,
28    pub ignore_case: bool,
29    pub dictionary_order: bool,
30    pub ignore_leading_blanks: bool,
31    pub human_numeric: bool,
32    pub version: bool,
33    pub random: bool,
34}
35
36impl SortKey {
37    /// Parse a sort key from a string like "2,4" or "1.3,1.5" or "2nr"
38    pub fn parse(keydef: &str) -> SortResult<Self> {
39        // Split by comma to get start and optional end
40        let parts: Vec<&str> = keydef.split(',').collect();
41        if parts.is_empty() || parts.len() > 2 {
42            return Err(SortError::parse_error(&format!(
43                "invalid key specification: {keydef}"
44            )));
45        }
46
47        // Parse start position and options
48        let (start_field, start_char, start_opts) = Self::parse_field_spec(parts[0])?;
49
50        // Parse end position if present
51        let (end_field, end_char, end_opts) = if parts.len() == 2 {
52            let (field, char_pos, opts) = Self::parse_field_spec(parts[1])?;
53            (Some(field), char_pos, opts)
54        } else {
55            (None, None, SortKeyOptions::default())
56        };
57
58        // Merge options (start options take precedence)
59        let mut options = start_opts;
60        // Apply end options only if they're set and start options aren't
61        if !options.numeric {
62            options.numeric = end_opts.numeric;
63        }
64        if !options.general_numeric {
65            options.general_numeric = end_opts.general_numeric;
66        }
67        if !options.month {
68            options.month = end_opts.month;
69        }
70        if !options.reverse {
71            options.reverse = end_opts.reverse;
72        }
73        if !options.ignore_case {
74            options.ignore_case = end_opts.ignore_case;
75        }
76        if !options.dictionary_order {
77            options.dictionary_order = end_opts.dictionary_order;
78        }
79        if !options.ignore_leading_blanks {
80            options.ignore_leading_blanks = end_opts.ignore_leading_blanks;
81        }
82        if !options.human_numeric {
83            options.human_numeric = end_opts.human_numeric;
84        }
85        if !options.version {
86            options.version = end_opts.version;
87        }
88        if !options.random {
89            options.random = end_opts.random;
90        }
91
92        Ok(Self {
93            start_field,
94            start_char,
95            end_field,
96            end_char,
97            options,
98        })
99    }
100
101    /// Parse a field specification like "2" or "2.3" or "2nr"
102    fn parse_field_spec(spec: &str) -> SortResult<(usize, Option<usize>, SortKeyOptions)> {
103        if spec.is_empty() {
104            return Err(SortError::parse_error("empty field specification"));
105        }
106
107        let mut chars = spec.chars().peekable();
108        let mut field_str = String::new();
109        let mut char_str = String::new();
110        let mut options = SortKeyOptions::default();
111
112        // Parse field number
113        while let Some(&ch) = chars.peek() {
114            if ch.is_ascii_digit() {
115                field_str.push(ch);
116                chars.next();
117            } else {
118                break;
119            }
120        }
121
122        if field_str.is_empty() {
123            return Err(SortError::parse_error(&format!(
124                "invalid field specification: {spec}"
125            )));
126        }
127
128        let field = field_str
129            .parse::<usize>()
130            .map_err(|_| SortError::parse_error(&format!("invalid field number: {field_str}")))?;
131
132        if field == 0 {
133            return Err(SortError::parse_error("field numbers start at 1"));
134        }
135
136        // Check for character position (after a dot)
137        let char_pos = if chars.peek() == Some(&'.') {
138            chars.next(); // consume the dot
139            while let Some(&ch) = chars.peek() {
140                if ch.is_ascii_digit() {
141                    char_str.push(ch);
142                    chars.next();
143                } else {
144                    break;
145                }
146            }
147
148            if char_str.is_empty() {
149                None
150            } else {
151                let pos = char_str.parse::<usize>().map_err(|_| {
152                    SortError::parse_error(&format!("invalid character position: {char_str}"))
153                })?;
154                if pos == 0 {
155                    return Err(SortError::parse_error("character positions start at 1"));
156                }
157                Some(pos)
158            }
159        } else {
160            None
161        };
162
163        // Parse options (single letters after the field spec)
164        for ch in chars {
165            match ch {
166                'n' => options.numeric = true,
167                'g' => options.general_numeric = true,
168                'M' => options.month = true,
169                'r' => options.reverse = true,
170                'f' => options.ignore_case = true,
171                'd' => options.dictionary_order = true,
172                'b' => options.ignore_leading_blanks = true,
173                'h' => options.human_numeric = true,
174                'V' => options.version = true,
175                'R' => options.random = true,
176                'i' => {} // ignore non-printing - not fully implemented
177                'z' => {} // zero-terminated - handled globally
178                _ => {
179                    return Err(SortError::parse_error(&format!("invalid key option: {ch}")));
180                }
181            }
182        }
183
184        Ok((field, char_pos, options))
185    }
186}
187
188/// Main configuration structure for sort operations
189#[derive(Debug, Clone)]
190pub struct SortConfig {
191    /// Primary sort mode
192    pub mode: SortMode,
193    /// Sort order (normal or reverse)
194    pub reverse: bool,
195    /// Output only unique lines
196    pub unique: bool,
197    /// Use stable sort algorithm
198    pub stable: bool,
199    /// Check if input is already sorted
200    pub check: bool,
201    /// Merge already sorted files
202    pub merge: bool,
203    /// Use zero bytes as line terminators instead of newlines
204    pub zero_terminated: bool,
205    /// Ignore case differences
206    pub ignore_case: bool,
207    /// Consider only dictionary order (alphanumeric and blanks)
208    pub dictionary_order: bool,
209    /// Ignore leading blanks
210    pub ignore_leading_blanks: bool,
211    /// Ignore non-printing characters
212    pub ignore_nonprinting: bool,
213    /// Field separator character
214    pub field_separator: Option<char>,
215    /// Sort keys (field specifications)
216    pub keys: Vec<SortKey>,
217    /// Output file path
218    pub output_file: Option<String>,
219    /// Buffer size for I/O operations
220    pub buffer_size: Option<usize>,
221    /// Number of parallel threads to use
222    pub parallel_threads: Option<usize>,
223    /// Files to read from (if not specified, use stdin)
224    pub input_files: Vec<String>,
225    /// Debug mode (for troubleshooting)
226    pub debug: bool,
227    /// Compress temporary files
228    pub compress_temp: bool,
229    /// Temporary directory for external sorting
230    pub temp_dir: Option<String>,
231}
232
233/// Sort mode enumeration
234#[derive(Debug, Clone, Copy, PartialEq, Eq)]
235pub enum SortMode {
236    /// Standard lexicographic sorting
237    Lexicographic,
238    /// Numeric sorting (integers)
239    Numeric,
240    /// General numeric sorting (floating point)
241    GeneralNumeric,
242    /// Human-readable numeric sorting (with suffixes like K, M, G)
243    HumanNumeric,
244    /// Month name sorting
245    Month,
246    /// Version number sorting
247    Version,
248    /// Random sorting (but group identical keys)
249    Random,
250}
251
252/// Sort order enumeration
253#[derive(Debug, Clone, Copy, PartialEq, Eq)]
254pub enum SortOrder {
255    Ascending,
256    Descending,
257}
258
259impl Default for SortConfig {
260    fn default() -> Self {
261        Self {
262            mode: SortMode::Lexicographic,
263            reverse: false,
264            unique: false,
265            stable: false,
266            check: false,
267            merge: false,
268            zero_terminated: false,
269            ignore_case: false,
270            dictionary_order: false,
271            ignore_leading_blanks: false,
272            ignore_nonprinting: false,
273            field_separator: None,
274            keys: Vec::new(),
275            output_file: None,
276            buffer_size: None,
277            parallel_threads: None,
278            input_files: Vec::new(),
279            debug: false,
280            compress_temp: false,
281            temp_dir: None,
282        }
283    }
284}
285
286impl SortConfig {
287    /// Create a new configuration with default values
288    pub fn new() -> Self {
289        Self::default()
290    }
291
292    /// Set the sort mode
293    pub fn with_mode(mut self, mode: SortMode) -> Self {
294        self.mode = mode;
295        self
296    }
297
298    /// Enable reverse sorting
299    pub fn with_reverse(mut self, reverse: bool) -> Self {
300        self.reverse = reverse;
301        self
302    }
303
304    /// Enable unique output
305    pub fn with_unique(mut self, unique: bool) -> Self {
306        self.unique = unique;
307        self
308    }
309
310    /// Enable stable sorting
311    pub fn with_stable(mut self, stable: bool) -> Self {
312        self.stable = stable;
313        self
314    }
315
316    /// Enable check mode
317    pub fn with_check(mut self, check: bool) -> Self {
318        self.check = check;
319        self
320    }
321
322    /// Enable merge mode
323    pub fn with_merge(mut self, merge: bool) -> Self {
324        self.merge = merge;
325        self
326    }
327
328    /// Enable zero-terminated lines
329    pub fn with_zero_terminated(mut self, zero_terminated: bool) -> Self {
330        self.zero_terminated = zero_terminated;
331        self
332    }
333
334    /// Set field separator
335    pub fn with_field_separator(mut self, separator: Option<char>) -> Self {
336        self.field_separator = separator;
337        self
338    }
339
340    /// Add a sort key
341    pub fn add_key(mut self, key: SortKey) -> Self {
342        self.keys.push(key);
343        self
344    }
345
346    /// Set output file
347    pub fn with_output_file(mut self, output_file: Option<String>) -> Self {
348        self.output_file = output_file;
349        self
350    }
351
352    /// Set buffer size
353    pub fn with_buffer_size(mut self, buffer_size: Option<usize>) -> Self {
354        self.buffer_size = buffer_size;
355        self
356    }
357
358    /// Set parallel threads
359    pub fn with_parallel_threads(mut self, threads: Option<usize>) -> Self {
360        self.parallel_threads = threads;
361        self
362    }
363
364    /// Set input files
365    pub fn with_input_files(mut self, files: Vec<String>) -> Self {
366        self.input_files = files;
367        self
368    }
369
370    /// Enable debug mode
371    pub fn with_debug(mut self, debug: bool) -> Self {
372        self.debug = debug;
373        self
374    }
375
376    /// Parse buffer size from string (simplified)
377    pub fn set_buffer_size_from_string(&mut self, size_str: &str) -> SortResult<()> {
378        // Simple parsing for now - just parse as number
379        let size = size_str
380            .parse::<usize>()
381            .map_err(|_| SortError::internal("Invalid buffer size"))?;
382        self.buffer_size = Some(size);
383        Ok(())
384    }
385
386    /// Validate configuration for consistency
387    pub fn validate(&self) -> SortResult<()> {
388        // Check for conflicting modes
389        if self.check && self.merge {
390            return Err(SortError::conflicting_options(
391                "cannot use both --check and --merge",
392            ));
393        }
394
395        if self.check && self.unique {
396            return Err(SortError::conflicting_options(
397                "--check is incompatible with --unique",
398            ));
399        }
400
401        if self.merge && self.unique {
402            // This is actually allowed, but warn about performance implications
403        }
404
405        // Validate field separator
406        if let Some(sep) = self.field_separator {
407            if sep == '\0' && !self.zero_terminated {
408                return Err(SortError::invalid_field_separator(
409                    "null character separator requires -z option",
410                ));
411            }
412        }
413
414        // Check for reasonable buffer size
415        if let Some(buffer_size) = self.buffer_size {
416            if buffer_size < 1024 {
417                return Err(SortError::invalid_buffer_size(
418                    "buffer size too small (minimum 1KB)",
419                ));
420            }
421            // Use u64 to avoid overflow on 32-bit systems
422            const MAX_BUFFER_SIZE: u64 = 8 * 1024 * 1024 * 1024; // 8GB
423            if buffer_size as u64 > MAX_BUFFER_SIZE {
424                return Err(SortError::invalid_buffer_size(
425                    "buffer size too large (maximum 8GB)",
426                ));
427            }
428        }
429
430        // Validate thread count
431        if let Some(threads) = self.parallel_threads {
432            if threads == 0 {
433                return Err(SortError::thread_pool_error(
434                    "thread count must be positive",
435                ));
436            }
437            if threads > 1024 {
438                return Err(SortError::thread_pool_error(
439                    "too many threads (maximum 1024)",
440                ));
441            }
442        }
443
444        Ok(())
445    }
446
447    /// Get the effective sort order
448    pub fn sort_order(&self) -> SortOrder {
449        if self.reverse {
450            SortOrder::Descending
451        } else {
452            SortOrder::Ascending
453        }
454    }
455
456    /// Check if random sort is enabled
457    pub fn random_sort(&self) -> bool {
458        matches!(self.mode, SortMode::Random)
459    }
460
461    /// Check if numeric sort mode is enabled
462    pub fn numeric_sort(&self) -> bool {
463        matches!(
464            self.mode,
465            SortMode::Numeric | SortMode::GeneralNumeric | SortMode::HumanNumeric
466        )
467    }
468
469    /// Check if any keys have specific sort types
470    pub fn has_typed_keys(&self) -> bool {
471        false // Simplified - no complex key checking
472    }
473
474    /// Get the number of input files (0 means stdin)
475    pub fn input_file_count(&self) -> usize {
476        self.input_files.len()
477    }
478
479    /// Check if reading from stdin
480    pub fn reading_from_stdin(&self) -> bool {
481        self.input_files.is_empty() || (self.input_files.len() == 1 && self.input_files[0] == "-")
482    }
483
484    /// Check if writing to stdout
485    pub fn writing_to_stdout(&self) -> bool {
486        self.output_file.is_none()
487    }
488
489    /// Get effective buffer size (with default)
490    pub fn effective_buffer_size(&self) -> usize {
491        self.buffer_size.unwrap_or(1024 * 1024) // 1MB default
492    }
493
494    /// Get effective thread count
495    pub fn effective_thread_count(&self) -> usize {
496        self.parallel_threads.unwrap_or_else(num_cpus::get)
497    }
498
499    /// Create a configuration for merge operations
500    pub fn for_merge(&self) -> Self {
501        let mut config = self.clone();
502        config.merge = true;
503        config.check = false;
504        config
505    }
506
507    /// Create a configuration for check operations
508    pub fn for_check(&self) -> Self {
509        let mut config = self.clone();
510        config.check = true;
511        config.merge = false;
512        config.unique = false; // Not applicable for check
513        config
514    }
515}
516
517impl FromStr for SortMode {
518    type Err = SortError;
519
520    fn from_str(s: &str) -> Result<Self, Self::Err> {
521        match s.to_lowercase().as_str() {
522            "lexicographic" | "text" | "default" => Ok(SortMode::Lexicographic),
523            "numeric" | "n" => Ok(SortMode::Numeric),
524            "general-numeric" | "g" => Ok(SortMode::GeneralNumeric),
525            "human-numeric" | "h" => Ok(SortMode::HumanNumeric),
526            "month" | "m" => Ok(SortMode::Month),
527            "version" | "v" => Ok(SortMode::Version),
528            "random" | "r" => Ok(SortMode::Random),
529            _ => Err(SortError::parse_error(&format!("unknown sort mode: {s}"))),
530        }
531    }
532}
533
534impl std::fmt::Display for SortMode {
535    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
536        let name = match self {
537            SortMode::Lexicographic => "lexicographic",
538            SortMode::Numeric => "numeric",
539            SortMode::GeneralNumeric => "general-numeric",
540            SortMode::HumanNumeric => "human-numeric",
541            SortMode::Month => "month",
542            SortMode::Version => "version",
543            SortMode::Random => "random",
544        };
545        write!(f, "{name}")
546    }
547}
548
549/// Builder pattern for creating configurations
550pub struct SortConfigBuilder {
551    config: SortConfig,
552}
553
554impl SortConfigBuilder {
555    /// Start building a new configuration
556    pub fn new() -> Self {
557        Self {
558            config: SortConfig::default(),
559        }
560    }
561
562    /// Set sort mode
563    pub fn mode(mut self, mode: SortMode) -> Self {
564        self.config.mode = mode;
565        self
566    }
567
568    /// Enable reverse sorting
569    pub fn reverse(mut self) -> Self {
570        self.config.reverse = true;
571        self
572    }
573
574    /// Enable unique output
575    pub fn unique(mut self) -> Self {
576        self.config.unique = true;
577        self
578    }
579
580    /// Enable stable sorting
581    pub fn stable(mut self) -> Self {
582        self.config.stable = true;
583        self
584    }
585
586    /// Enable check mode
587    pub fn check(mut self) -> Self {
588        self.config.check = true;
589        self
590    }
591
592    /// Enable merge mode
593    pub fn merge(mut self) -> Self {
594        self.config.merge = true;
595        self
596    }
597
598    /// Enable zero-terminated lines
599    pub fn zero_terminated(mut self) -> Self {
600        self.config.zero_terminated = true;
601        self
602    }
603
604    /// Set field separator
605    pub fn field_separator(mut self, separator: char) -> Self {
606        self.config.field_separator = Some(separator);
607        self
608    }
609
610    /// Add a sort key
611    pub fn key(mut self, key: SortKey) -> Self {
612        self.config.keys.push(key);
613        self
614    }
615
616    /// Set output file
617    pub fn output_file(mut self, file: String) -> Self {
618        self.config.output_file = Some(file);
619        self
620    }
621
622    /// Set buffer size
623    pub fn buffer_size(mut self, size: usize) -> Self {
624        self.config.buffer_size = Some(size);
625        self
626    }
627
628    /// Build the final configuration
629    pub fn build(self) -> SortResult<SortConfig> {
630        self.config.validate()?;
631        Ok(self.config)
632    }
633}
634
635impl Default for SortConfigBuilder {
636    fn default() -> Self {
637        Self::new()
638    }
639}
640
641/// Preset configurations for common use cases
642pub mod presets {
643    use super::*;
644
645    /// Configuration for numeric sorting
646    pub fn numeric() -> SortConfig {
647        SortConfig::new().with_mode(SortMode::Numeric)
648    }
649
650    /// Configuration for version sorting
651    pub fn version() -> SortConfig {
652        SortConfig::new().with_mode(SortMode::Version)
653    }
654
655    /// Configuration for human-readable sizes
656    pub fn human_numeric() -> SortConfig {
657        SortConfig::new().with_mode(SortMode::HumanNumeric)
658    }
659
660    /// Configuration for case-insensitive sorting
661    pub fn case_insensitive() -> SortConfig {
662        let mut config = SortConfig::new();
663        config.ignore_case = true;
664        config
665    }
666
667    /// Configuration for sorting with unique output
668    pub fn unique() -> SortConfig {
669        SortConfig::new().with_unique(true)
670    }
671
672    /// Configuration for reverse sorting
673    pub fn reverse() -> SortConfig {
674        SortConfig::new().with_reverse(true)
675    }
676
677    /// Configuration for stable sorting
678    pub fn stable() -> SortConfig {
679        SortConfig::new().with_stable(true)
680    }
681
682    /// Configuration for merge mode
683    pub fn merge() -> SortConfig {
684        SortConfig::new().with_merge(true)
685    }
686
687    /// Configuration for check mode
688    pub fn check() -> SortConfig {
689        SortConfig::new().with_check(true)
690    }
691}
692
693#[cfg(test)]
694mod tests {
695    use super::*;
696
697    #[test]
698    fn test_default_config() {
699        let config = SortConfig::default();
700        assert_eq!(config.mode, SortMode::Lexicographic);
701        assert!(!config.reverse);
702        assert!(!config.unique);
703        assert!(!config.stable);
704    }
705
706    #[test]
707    fn test_config_builder() {
708        let config = SortConfigBuilder::new()
709            .mode(SortMode::Numeric)
710            .reverse()
711            .unique()
712            .build()
713            .expect("Failed to build test config");
714
715        assert_eq!(config.mode, SortMode::Numeric);
716        assert!(config.reverse);
717        assert!(config.unique);
718    }
719
720    #[test]
721    fn test_sort_mode_from_str() {
722        assert_eq!(
723            "numeric"
724                .parse::<SortMode>()
725                .expect("Failed to parse numeric mode"),
726            SortMode::Numeric
727        );
728        assert_eq!(
729            "version"
730                .parse::<SortMode>()
731                .expect("Failed to parse version mode"),
732            SortMode::Version
733        );
734        assert!("invalid".parse::<SortMode>().is_err());
735    }
736
737    #[test]
738    fn test_validate_conflicting_options() {
739        let config = SortConfig {
740            check: true,
741            merge: true,
742            ..Default::default()
743        };
744
745        assert!(config.validate().is_err());
746    }
747
748    #[test]
749    fn test_effective_buffer_size() {
750        let config = SortConfig::default();
751        assert_eq!(config.effective_buffer_size(), 1024 * 1024);
752
753        let config = SortConfig::default().with_buffer_size(Some(2048));
754        assert_eq!(config.effective_buffer_size(), 2048);
755    }
756
757    #[test]
758    fn test_presets() {
759        let config = presets::numeric();
760        assert_eq!(config.mode, SortMode::Numeric);
761
762        let config = presets::reverse();
763        assert!(config.reverse);
764
765        let config = presets::unique();
766        assert!(config.unique);
767    }
768
769    #[test]
770    fn test_reading_from_stdin() {
771        let config = SortConfig::default();
772        assert!(config.reading_from_stdin());
773
774        let config = SortConfig::default().with_input_files(vec!["-".to_string()]);
775        assert!(config.reading_from_stdin());
776
777        let config = SortConfig::default().with_input_files(vec!["file.txt".to_string()]);
778        assert!(!config.reading_from_stdin());
779    }
780}