1use std::fs::File;
2use std::io::Error;
3
4use fp_core::{empty::*, monoid::*, semigroup::*};
5use seahorse::{App, Command, Context, Flag, FlagType};
6
7use crate::bytes_iter::BytesIter;
8use crate::constants::*;
9use crate::error::*;
10use crate::util::{is_newline, opt_as_empty_str};
11
12pub fn wc_app() -> App {
13 App::new("wc")
14 .author("Brandon Elam Barker")
15 .action(run_wc_seahorse_action)
16 .command(run_wc_seahorse_cmd())
17}
18
19const WC_USAGE: &str = r#"
20wc [OPTION] FILE
21
22No option implies lines, words and bytes will be printed.
23
24Valid options are:
25-c print the byte counts
26-w print the word counts
27-l print the line counts
28
29"#;
30
31pub fn run_wc_seahorse_cmd() -> Command {
32 Command::new("wc")
33 .description("wc: line, word, and byte counting")
34 .usage(WC_USAGE)
35 .action(run_wc_seahorse_action)
36 .flag(
37 Flag::new("bytes", FlagType::Bool)
38 .alias("c")
39 .description("wc -c some_file"),
40 )
41 .flag(
42 Flag::new("words", FlagType::Bool)
43 .alias("w")
44 .description("wc -w some_file"),
45 )
46 .flag(
47 Flag::new("lines", FlagType::Bool)
48 .alias("l")
49 .description("wc -l some_file"),
50 )
51}
52
53pub fn run_wc_seahorse_action(ctxt: &Context) {
54 let src = ctxt.args.first().user_err("wc: missing source");
55 let do_bytes = ctxt.bool_flag("bytes");
56 let do_words = ctxt.bool_flag("words");
57 let do_lines = ctxt.bool_flag("lines");
58 let do_all = do_lines && do_words && do_bytes;
59 let do_all = do_all || (!do_lines && !do_words && !do_bytes);
60 let counts: Counts;
61 if do_all {
62 counts = wc_all(src).user_err("Error in wc_all");
63 } else {
64 let mut build_counts = Counts::null();
65 if do_bytes {
66 build_counts = wc_bytes(src)
67 .map(|b| build_counts.bytes(b))
68 .user_err("Error in wc_bytes");
69 }
70 if do_words {
71 build_counts = wc_words(src)
72 .map(|b| build_counts.words(b))
73 .user_err("Error in wc_words");
74 }
75 if do_lines {
76 build_counts = wc_lines(src)
77 .map(|b| build_counts.lines(b))
78 .user_err("Error in wc_lines");
79 }
80 counts = build_counts;
81 }
82 println!("{}", Counts::format(&counts));
83}
84
85pub fn run_wc_lines(src: &str) {
88 let wc_res = wc_lines(src).user_err("Error in wc_lines");
89 println!("{}", Counts::format(&Counts::null().lines(wc_res)));
90}
91
92pub fn wc_lines(src: &str) -> Result<usize, Error> {
93 let f_in =
94 File::open(src).sfw_err(&format!("Couldn't open source: {}", &src))?;
95 wc_lines_file(&f_in)
96}
97
98pub fn wc_lines_file(f_in: &File) -> Result<usize, Error> {
103 BytesIter::new(f_in, DEFAULT_BUF_SIZE)
104 .try_fold(0_usize, |ac_tot, b_slice| {
105 Ok(ac_tot + num_newlines(&b_slice?))
106 })
107}
108
109pub fn run_wc_bytes(src: &str) {
112 let wc_res = wc_bytes(src).user_err("Error in wc_bytes");
113 println!("{} {}", wc_res, &src);
114}
115
116pub fn wc_bytes(src: &str) -> Result<usize, Error> {
117 let f_in =
118 File::open(src).sfw_err(&format!("Couldn't open source: {}", &src))?;
119 wc_bytes_file(&f_in)
120}
121
122pub fn wc_bytes_file(f_in: &File) -> Result<usize, Error> {
123 BytesIter::new(f_in, DEFAULT_BUF_SIZE)
124 .try_fold(0_usize, |ac_tot, b_slice| Ok(ac_tot + b_slice?.len()))
125}
126
127pub fn run_wc_words(src: &str) {
130 let wc_res = wc_words(src).user_err("Error in wc_words");
131 println!("{}", Counts::format(&Counts::null().words(wc_res)));
132}
133
134pub fn wc_words(src: &str) -> Result<usize, Error> {
135 let f_in =
136 File::open(src).sfw_err(&format!("Couldn't open source: {}", &src))?;
137 wc_words_file(&f_in)
138}
139
140pub fn wc_words_file(f_in: &File) -> Result<usize, Error> {
141 BytesIter::new(f_in, DEFAULT_BUF_SIZE)
142 .try_fold(0_usize, |ac_tot, b_slice| {
143 Ok(ac_tot + word_count(b_slice?.as_slice()))
144 })
145}
146
147#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)]
149enum CharType {
150 IsSpace,
152 NotSpace,
154}
155
156impl From<&u8> for CharType {
157 fn from(other: &u8) -> Self {
158 if other.is_ascii_whitespace() {
159 CharType::IsSpace
162 } else {
163 CharType::NotSpace
164 }
165 }
166}
167
168#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)]
169struct WordCount {
170 current: CharType,
171 count: usize,
172}
173
174impl From<&u8> for WordCount {
175 fn from(other: &u8) -> Self {
176 WordCount {
177 current: CharType::from(other),
178 count: 0,
179 }
180 }
181}
182
183const WORD_COUNT_0: WordCount = WordCount {
184 current: CharType::IsSpace,
185 count: 0,
186};
187
188impl Empty for WordCount {
189 fn empty() -> Self {
190 WORD_COUNT_0
191 }
192}
193impl Semigroup for WordCount {
195 fn combine(self, other: Self) -> Self {
196 let new_count = match other.current {
197 CharType::IsSpace => self.count,
198 CharType::NotSpace => match self.current {
199 CharType::IsSpace => self.count + 1,
200 CharType::NotSpace => self.count,
201 },
202 };
203 WordCount {
204 current: other.current,
205 count: new_count,
206 }
207 }
208}
209impl Monoid for WordCount {}
211
212pub fn word_count(b_slice: &[u8]) -> usize {
213 b_slice
214 .iter()
215 .map(WordCount::from)
216 .fold(Empty::empty(), Semigroup::combine)
217 .count
218}
219
220pub fn num_newlines(b_slice: &[u8]) -> usize {
221 b_slice.iter().fold(
222 0_usize,
223 |ac, bt| {
224 if is_newline(*bt) {
225 ac + 1
226 } else {
227 ac
228 }
229 },
230 )
231}
232
233#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)]
234pub struct Counts {
235 pub bytes: Option<usize>,
236 pub words: Option<usize>,
237 pub lines: Option<usize>,
238}
239
240impl Counts {
241 const fn new(bytes: usize, words: usize, lines: usize) -> Self {
242 Counts {
243 bytes: Some(bytes),
244 words: Some(words),
245 lines: Some(lines),
246 }
247 }
248 const fn empty() -> Self {
249 Self::new(0, 0, 0)
250 }
251 const fn null() -> Self {
252 Counts {
253 bytes: None,
254 words: None,
255 lines: None,
256 }
257 }
258 const fn bytes(self, bytes: usize) -> Self {
259 Counts {
260 bytes: Some(bytes),
261 ..self
262 }
263 }
264 const fn words(self, words: usize) -> Self {
265 Counts {
266 words: Some(words),
267 ..self
268 }
269 }
270 const fn lines(self, lines: usize) -> Self {
271 Counts {
272 lines: Some(lines),
273 ..self
274 }
275 }
276 fn format(&self) -> String {
278 let b_str = opt_as_empty_str(self.bytes);
279 let w_str = opt_as_empty_str(self.words);
280 let l_str = opt_as_empty_str(self.lines);
281 format!("{} {} {}", l_str, w_str, b_str)
282 }
283}
284
285#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)]
291struct Flux {
292 pub left_char_type: CharType,
294 pub bytes: usize,
296 pub words: usize,
298 pub lines: usize,
300 pub right_char_type: CharType,
302}
303
304impl Flux {
305 fn new(
307 left_char_type: CharType,
308 bytes: usize,
309 words: usize,
310 lines: usize,
311 right_char_type: CharType,
312 ) -> Self {
313 Flux {
314 left_char_type,
315 bytes,
316 words,
317 lines,
318 right_char_type,
319 }
320 }
321
322 fn span(self, rhs: Flux) -> Self {
324 let words = {
325 if let (CharType::NotSpace, CharType::NotSpace) =
328 (self.right_char_type, rhs.left_char_type)
329 {
330 self.words + rhs.words - 1
331 } else {
332 self.words + rhs.words
333 }
334 };
335
336 Flux::new(
337 self.left_char_type,
338 self.bytes + rhs.bytes,
339 words,
340 self.lines + rhs.lines,
341 rhs.right_char_type,
342 )
343 }
344}
345
346#[derive(Copy, Clone, Eq, PartialEq, Debug)]
347enum FluxMay {
348 FluxSome(Flux),
349 FluxEmpty,
350}
351use FluxMay::*;
352
353impl FluxMay {
354 fn new(
356 left_char_type: CharType,
357 bytes: usize,
358 words: usize,
359 lines: usize,
360 right_char_type: CharType,
361 ) -> Self {
362 FluxMay::FluxSome(Flux::new(
363 left_char_type,
364 bytes,
365 words,
366 lines,
367 right_char_type,
368 ))
369 }
370
371 fn counts(&self) -> Counts {
372 match self {
373 FluxSome(flux) => Counts::new(flux.bytes, flux.words, flux.lines),
374 FluxEmpty => Counts::empty(),
375 }
376 }
377}
378
379impl Empty for FluxMay {
380 fn empty() -> Self {
381 FluxMay::FluxEmpty
382 }
383}
384impl Semigroup for FluxMay {
386 fn combine(self, other: Self) -> Self {
387 match other {
388 FluxEmpty => self,
389 FluxSome(other_flux) => match self {
390 FluxEmpty => other,
391 FluxSome(self_flux) => {
392 FluxSome(Flux::span(self_flux, other_flux))
393 }
394 },
395 }
396 }
397}
398impl Monoid for FluxMay {}
400
401impl From<&[u8]> for FluxMay {
402 fn from(buf: &[u8]) -> Self {
404 if buf.is_empty() {
405 FluxMay::FluxEmpty
406 } else {
407 let lines = num_newlines(buf);
410 let first_char = CharType::from(buf.first().unwrap_or(&b' '));
411 let last_char = CharType::from(buf.last().unwrap_or(&b' '));
412
413 FluxMay::new(
414 first_char,
415 buf.len(),
416 word_count(buf),
417 lines,
418 last_char,
419 )
420 }
421 }
422}
423
424pub fn run_wc_all(src: &str) {
427 let wc_res = wc_all(src).user_err("Error in wc_all");
428 println!("{}", Counts::format(&wc_res));
429}
430
431pub fn wc_all(src: &str) -> Result<Counts, Error> {
432 let f_in =
433 File::open(src).sfw_err(&format!("Couldn't open source: {}", &src))?;
434 wc_all_file(&f_in)
435}
436
437pub fn wc_all_file(f_in: &File) -> Result<Counts, Error> {
438 BytesIter::new(f_in, DEFAULT_BUF_SIZE)
439 .try_fold(FluxEmpty, |flux_may, b_slice| {
440 Ok(Semigroup::combine(
441 flux_may,
442 FluxMay::from(b_slice?.as_slice()),
443 ))
444 })
445 .map(|f| FluxMay::counts(&f))
446}
447
448#[cfg(test)]
449mod tests {
450 use super::*;
451
452 #[test]
453 fn test_word_count_over_byte_string() {
454 let num_words1 = word_count("testing one\ntwo three".as_bytes());
455 assert_eq!(num_words1, 4);
456 let num_words2 = word_count("testing one\ntwo three\n".as_bytes());
457 assert_eq!(num_words2, 4);
458 let num_words3 = word_count("\ntesting one\ntwo three".as_bytes());
459 assert_eq!(num_words3, 4);
460 let num_words4 = word_count(" testing one two three\n ".as_bytes());
461 assert_eq!(num_words4, 4);
462 }
463
464 #[test]
465 fn test_flux_may_from() {
466 assert_eq!(
467 FluxMay::from("testing one two three ".as_bytes()),
468 FluxSome(Flux::new(
469 CharType::NotSpace,
470 22,
471 4,
472 0,
473 CharType::IsSpace
474 ))
475 );
476 }
477
478 #[test]
479 fn test_flux_may_combine() {
480 let flux_l = FluxMay::from("testing on".as_bytes());
481 let flux_r = FluxMay::from("e two three".as_bytes());
482
483 assert_eq!(
484 Semigroup::combine(flux_l, flux_r),
485 FluxSome(Flux::new(
486 CharType::NotSpace,
487 21,
488 4,
489 0,
490 CharType::NotSpace
491 ))
492 );
493 }
494
495 #[test]
496 fn test_flux_may_combine_space() {
497 let flux_l = FluxMay::from("testing one ".as_bytes());
498 let flux_r = FluxMay::from(" two three".as_bytes());
499
500 assert_eq!(
501 Semigroup::combine(flux_l, flux_r),
502 FluxSome(Flux::new(
503 CharType::NotSpace,
504 22,
505 4,
506 0,
507 CharType::NotSpace
508 ))
509 );
510 }
511}