word_tally/options/
mod.rs

1//! Configuration options for word tallying.
2//!
3//! The [`Options`] struct provides a builder API for configuring word tallying behavior.
4//!
5//! # Common Patterns
6//!
7//! ```
8//! use word_tally::{Case, Filters, Io, Options, Serialization};
9//!
10//! // Automatic I/O selection (default)
11//! let auto = Options::default();
12//!
13//! // Fast processing of large files
14//! let fast = Options::default().with_io(Io::ParallelMmap);
15//!
16//! // Memory-constrained environment
17//! let low_memory = Options::default().with_io(Io::Stream);
18//!
19//! // Case-insensitive frequency analysis
20//! let frequency = Options::default()
21//!     .with_case(Case::Lower)
22//!     .with_filters(Filters::default().with_min_count(2));
23//!
24//! // Export for data analysis
25//! let export = Options::default()
26//!     .with_serialization(Serialization::Csv)
27//!     .with_filters(Filters::default().with_min_chars(4));
28//! ```
29//!
30//! # Components
31//!
32//! - [`Case`] - Word case normalization
33//! - [`Sort`] - Result ordering
34//! - [`Serialization`] - Output format
35//! - [`Filters`] - Word filtering rules
36//! - [`Io`] - I/O strategy
37//! - [`Performance`] - Performance tuning
38//!
39//! # Environment Variables
40//!
41//! - `WORD_TALLY_IO` (default: `parallel-stream`)
42//! - `WORD_TALLY_THREADS` (default: all cores)
43//! - `WORD_TALLY_CHUNK_SIZE` (default: 65536)
44//! - `WORD_TALLY_UNIQUENESS_RATIO` (default: 32)
45//! - `WORD_TALLY_WORDS_PER_KB` (default: 128)
46//! - `WORD_TALLY_STDIN_BUFFER_SIZE` (default: 262144)
47
48pub mod case;
49pub mod delimiters;
50pub mod filters;
51pub mod io;
52pub mod patterns;
53pub mod performance;
54pub mod serialization;
55pub mod sort;
56pub mod threads;
57
58use core::fmt::{self, Display, Formatter};
59
60use serde::{Deserialize, Serialize};
61
62use self::{
63    case::Case, filters::Filters, io::Io, performance::Performance, serialization::Serialization,
64    sort::Sort,
65};
66use crate::WordTallyError;
67
68/// Unified configuration for word tallying operations.
69///
70/// `Options` consolidates all configuration aspects of word tallying into a single structure.
71#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
72#[serde(rename_all = "camelCase")]
73pub struct Options {
74    /// Case handling strategy (original, lower, upper).
75    case: Case,
76
77    /// Sort order for results (unsorted, ascending, descending).
78    sort: Sort,
79
80    /// Serialization configuration (output format, delimiter).
81    serialization: Serialization,
82
83    /// Filter settings (word length, frequency, patterns, exclusions).
84    filters: Filters,
85
86    /// I/O strategy (sequential, streamed, in-memory, memory-mapped).
87    io: Io,
88
89    /// Performance tuning configuration (threads, memory allocation, chunk size).
90    performance: Performance,
91}
92
93impl Options {
94    /// Creates a new `Options` with custom case, sort, serializer, filters, and performance
95    /// configurations.
96    ///
97    /// # Examples
98    ///
99    /// ```
100    /// use word_tally::{Case, Filters, Io, Options, Performance, Serialization, Sort};
101    ///
102    /// // Default configuration
103    /// let options = Options::default();
104    /// assert_eq!(options.io(), Io::Auto);
105    ///
106    /// // Targeted customization with builder methods
107    /// let options = Options::default()
108    ///     .with_case(Case::Lower)
109    ///     .with_serialization(Serialization::Json);
110    /// assert_eq!(options.serialization(), &Serialization::Json);
111    /// ```
112    #[must_use]
113    pub const fn new(
114        case: Case,
115        sort: Sort,
116        serialization: Serialization,
117        filters: Filters,
118        io: Io,
119        performance: Performance,
120    ) -> Self {
121        Self {
122            case,
123            sort,
124            serialization,
125            filters,
126            io,
127            performance,
128        }
129    }
130
131    /// Set case handling strategy.
132    #[must_use]
133    pub const fn with_case(mut self, case: Case) -> Self {
134        self.case = case;
135        self
136    }
137
138    /// Set sort order.
139    #[must_use]
140    pub const fn with_sort(mut self, sort: Sort) -> Self {
141        self.sort = sort;
142        self
143    }
144
145    /// Set serialization options while preserving other options.
146    #[must_use]
147    pub fn with_serialization(mut self, serialization: Serialization) -> Self {
148        self.serialization = serialization;
149        self
150    }
151
152    /// Set filters while preserving other options.
153    #[must_use]
154    pub fn with_filters(mut self, filters: Filters) -> Self {
155        self.filters = filters;
156        self
157    }
158
159    /// Set performance configuration while preserving other options.
160    #[must_use]
161    pub const fn with_performance(mut self, performance: Performance) -> Self {
162        self.performance = performance;
163        self
164    }
165
166    /// Set I/O strategy.
167    #[must_use]
168    pub const fn with_io(mut self, io: Io) -> Self {
169        self.io = io;
170        self
171    }
172
173    /// Get the case normalization setting.
174    #[must_use]
175    pub const fn case(&self) -> Case {
176        self.case
177    }
178
179    /// Get the word sorting setting.
180    #[must_use]
181    pub const fn sort(&self) -> Sort {
182        self.sort
183    }
184
185    /// Get a reference to the serialization options.
186    #[must_use]
187    pub const fn serialization(&self) -> &Serialization {
188        &self.serialization
189    }
190
191    /// Get a reference to the filters.
192    #[must_use]
193    pub const fn filters(&self) -> &Filters {
194        &self.filters
195    }
196
197    /// Get a reference to the performance configuration.
198    #[must_use]
199    pub const fn performance(&self) -> &Performance {
200        &self.performance
201    }
202
203    /// Get the I/O strategy.
204    #[must_use]
205    pub const fn io(&self) -> Io {
206        self.io
207    }
208
209    /// Initialize the thread pool if using a parallel I/O mode.
210    ///
211    /// This method initializes the global thread pool when using parallel I/O modes
212    /// (streamed, in-memory, or memory-mapped). For sequential mode, this is a no-op.
213    ///
214    /// # Errors
215    ///
216    /// Returns an error if a parallel I/O mode is selected but the thread pool
217    /// cannot be initialized.
218    pub fn init_thread_pool_if_parallel(&self) -> Result<(), WordTallyError> {
219        match self.io {
220            Io::Stream => Ok(()),
221            Io::Auto
222            | Io::ParallelStream
223            | Io::ParallelInMemory
224            | Io::ParallelMmap
225            | Io::ParallelBytes => self.performance.threads().init_pool(),
226        }
227    }
228}
229
230impl Display for Options {
231    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
232        write!(
233            f,
234            "Options {{ case: {}, sort: {}, serialization: {}, filters: {:?}, io: {} }}",
235            self.case, self.sort, self.serialization, self.filters, self.io
236        )
237    }
238}
239
240impl AsRef<Serialization> for Options {
241    fn as_ref(&self) -> &Serialization {
242        &self.serialization
243    }
244}
245
246impl AsRef<Filters> for Options {
247    fn as_ref(&self) -> &Filters {
248        &self.filters
249    }
250}
251
252impl AsRef<Performance> for Options {
253    fn as_ref(&self) -> &Performance {
254        &self.performance
255    }
256}