word_tally/options/mod.rs
1//! Configuration options for word tallying.
2//!
3//! The [`Options`] struct provides a builder API for configuring word tallying behavior.
4//!
5//! # Common Patterns
6//!
7//! ```
8//! use word_tally::{Case, Filters, Io, Options, Serialization};
9//!
10//! // Automatic I/O selection (default)
11//! let auto = Options::default();
12//!
13//! // Fast processing of large files
14//! let fast = Options::default().with_io(Io::ParallelMmap);
15//!
16//! // Memory-constrained environment
17//! let low_memory = Options::default().with_io(Io::Stream);
18//!
19//! // Case-insensitive frequency analysis
20//! let frequency = Options::default()
21//! .with_case(Case::Lower)
22//! .with_filters(Filters::default().with_min_count(2));
23//!
24//! // Export for data analysis
25//! let export = Options::default()
26//! .with_serialization(Serialization::Csv)
27//! .with_filters(Filters::default().with_min_chars(4));
28//! ```
29//!
30//! # Components
31//!
32//! - [`Case`] - Word case normalization
33//! - [`Sort`] - Result ordering
34//! - [`Serialization`] - Output format
35//! - [`Filters`] - Word filtering rules
36//! - [`Io`] - I/O strategy
37//! - [`Performance`] - Performance tuning
38//!
39//! # Environment Variables
40//!
41//! - `WORD_TALLY_IO` (default: `parallel-stream`)
42//! - `WORD_TALLY_THREADS` (default: all cores)
43//! - `WORD_TALLY_CHUNK_SIZE` (default: 65536)
44//! - `WORD_TALLY_UNIQUENESS_RATIO` (default: 32)
45//! - `WORD_TALLY_WORDS_PER_KB` (default: 128)
46//! - `WORD_TALLY_STDIN_BUFFER_SIZE` (default: 262144)
47
48pub mod case;
49pub mod delimiters;
50pub mod filters;
51pub mod io;
52pub mod patterns;
53pub mod performance;
54pub mod serialization;
55pub mod sort;
56pub mod threads;
57
58use core::fmt::{self, Display, Formatter};
59
60use serde::{Deserialize, Serialize};
61
62use self::{
63 case::Case, filters::Filters, io::Io, performance::Performance, serialization::Serialization,
64 sort::Sort,
65};
66use crate::WordTallyError;
67
68/// Unified configuration for word tallying operations.
69///
70/// `Options` consolidates all configuration aspects of word tallying into a single structure.
71#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
72#[serde(rename_all = "camelCase")]
73pub struct Options {
74 /// Case handling strategy (original, lower, upper).
75 case: Case,
76
77 /// Sort order for results (unsorted, ascending, descending).
78 sort: Sort,
79
80 /// Serialization configuration (output format, delimiter).
81 serialization: Serialization,
82
83 /// Filter settings (word length, frequency, patterns, exclusions).
84 filters: Filters,
85
86 /// I/O strategy (sequential, streamed, in-memory, memory-mapped).
87 io: Io,
88
89 /// Performance tuning configuration (threads, memory allocation, chunk size).
90 performance: Performance,
91}
92
93impl Options {
94 /// Creates a new `Options` with custom case, sort, serializer, filters, and performance
95 /// configurations.
96 ///
97 /// # Examples
98 ///
99 /// ```
100 /// use word_tally::{Case, Filters, Io, Options, Performance, Serialization, Sort};
101 ///
102 /// // Default configuration
103 /// let options = Options::default();
104 /// assert_eq!(options.io(), Io::Auto);
105 ///
106 /// // Targeted customization with builder methods
107 /// let options = Options::default()
108 /// .with_case(Case::Lower)
109 /// .with_serialization(Serialization::Json);
110 /// assert_eq!(options.serialization(), &Serialization::Json);
111 /// ```
112 #[must_use]
113 pub const fn new(
114 case: Case,
115 sort: Sort,
116 serialization: Serialization,
117 filters: Filters,
118 io: Io,
119 performance: Performance,
120 ) -> Self {
121 Self {
122 case,
123 sort,
124 serialization,
125 filters,
126 io,
127 performance,
128 }
129 }
130
131 /// Set case handling strategy.
132 #[must_use]
133 pub const fn with_case(mut self, case: Case) -> Self {
134 self.case = case;
135 self
136 }
137
138 /// Set sort order.
139 #[must_use]
140 pub const fn with_sort(mut self, sort: Sort) -> Self {
141 self.sort = sort;
142 self
143 }
144
145 /// Set serialization options while preserving other options.
146 #[must_use]
147 pub fn with_serialization(mut self, serialization: Serialization) -> Self {
148 self.serialization = serialization;
149 self
150 }
151
152 /// Set filters while preserving other options.
153 #[must_use]
154 pub fn with_filters(mut self, filters: Filters) -> Self {
155 self.filters = filters;
156 self
157 }
158
159 /// Set performance configuration while preserving other options.
160 #[must_use]
161 pub const fn with_performance(mut self, performance: Performance) -> Self {
162 self.performance = performance;
163 self
164 }
165
166 /// Set I/O strategy.
167 #[must_use]
168 pub const fn with_io(mut self, io: Io) -> Self {
169 self.io = io;
170 self
171 }
172
173 /// Get the case normalization setting.
174 #[must_use]
175 pub const fn case(&self) -> Case {
176 self.case
177 }
178
179 /// Get the word sorting setting.
180 #[must_use]
181 pub const fn sort(&self) -> Sort {
182 self.sort
183 }
184
185 /// Get a reference to the serialization options.
186 #[must_use]
187 pub const fn serialization(&self) -> &Serialization {
188 &self.serialization
189 }
190
191 /// Get a reference to the filters.
192 #[must_use]
193 pub const fn filters(&self) -> &Filters {
194 &self.filters
195 }
196
197 /// Get a reference to the performance configuration.
198 #[must_use]
199 pub const fn performance(&self) -> &Performance {
200 &self.performance
201 }
202
203 /// Get the I/O strategy.
204 #[must_use]
205 pub const fn io(&self) -> Io {
206 self.io
207 }
208
209 /// Initialize the thread pool if using a parallel I/O mode.
210 ///
211 /// This method initializes the global thread pool when using parallel I/O modes
212 /// (streamed, in-memory, or memory-mapped). For sequential mode, this is a no-op.
213 ///
214 /// # Errors
215 ///
216 /// Returns an error if a parallel I/O mode is selected but the thread pool
217 /// cannot be initialized.
218 pub fn init_thread_pool_if_parallel(&self) -> Result<(), WordTallyError> {
219 match self.io {
220 Io::Stream => Ok(()),
221 Io::Auto
222 | Io::ParallelStream
223 | Io::ParallelInMemory
224 | Io::ParallelMmap
225 | Io::ParallelBytes => self.performance.threads().init_pool(),
226 }
227 }
228}
229
230impl Display for Options {
231 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
232 write!(
233 f,
234 "Options {{ case: {}, sort: {}, serialization: {}, filters: {:?}, io: {} }}",
235 self.case, self.sort, self.serialization, self.filters, self.io
236 )
237 }
238}
239
240impl AsRef<Serialization> for Options {
241 fn as_ref(&self) -> &Serialization {
242 &self.serialization
243 }
244}
245
246impl AsRef<Filters> for Options {
247 fn as_ref(&self) -> &Filters {
248 &self.filters
249 }
250}
251
252impl AsRef<Performance> for Options {
253 fn as_ref(&self) -> &Performance {
254 &self.performance
255 }
256}