sfwtools/
counting.rs

1use std::fs::File;
2use std::io::Error;
3
4use fp_core::{empty::*, monoid::*, semigroup::*};
5use seahorse::{App, Command, Context, Flag, FlagType};
6
7use crate::bytes_iter::BytesIter;
8use crate::constants::*;
9use crate::error::*;
10use crate::util::{is_newline, opt_as_empty_str};
11
12pub fn wc_app() -> App {
13    App::new("wc")
14        .author("Brandon Elam Barker")
15        .action(run_wc_seahorse_action)
16        .command(run_wc_seahorse_cmd())
17}
18
19const WC_USAGE: &str = r#"
20wc [OPTION] FILE
21
22No option implies lines, words and bytes will be printed.
23
24Valid options are:
25-c            print the byte counts
26-w            print the word counts
27-l            print the line counts
28
29"#;
30
31pub fn run_wc_seahorse_cmd() -> Command {
32    Command::new("wc")
33        .description("wc: line, word, and byte counting")
34        .usage(WC_USAGE)
35        .action(run_wc_seahorse_action)
36        .flag(
37            Flag::new("bytes", FlagType::Bool)
38                .alias("c")
39                .description("wc -c some_file"),
40        )
41        .flag(
42            Flag::new("words", FlagType::Bool)
43                .alias("w")
44                .description("wc -w some_file"),
45        )
46        .flag(
47            Flag::new("lines", FlagType::Bool)
48                .alias("l")
49                .description("wc -l some_file"),
50        )
51}
52
53pub fn run_wc_seahorse_action(ctxt: &Context) {
54    let src = ctxt.args.first().user_err("wc: missing source");
55    let do_bytes = ctxt.bool_flag("bytes");
56    let do_words = ctxt.bool_flag("words");
57    let do_lines = ctxt.bool_flag("lines");
58    let do_all = do_lines && do_words && do_bytes;
59    let do_all = do_all || (!do_lines && !do_words && !do_bytes);
60    let counts: Counts;
61    if do_all {
62        counts = wc_all(src).user_err("Error in wc_all");
63    } else {
64        let mut build_counts = Counts::null();
65        if do_bytes {
66            build_counts = wc_bytes(src)
67                .map(|b| build_counts.bytes(b))
68                .user_err("Error in wc_bytes");
69        }
70        if do_words {
71            build_counts = wc_words(src)
72                .map(|b| build_counts.words(b))
73                .user_err("Error in wc_words");
74        }
75        if do_lines {
76            build_counts = wc_lines(src)
77                .map(|b| build_counts.lines(b))
78                .user_err("Error in wc_lines");
79        }
80        counts = build_counts;
81    }
82    println!("{}", Counts::format(&counts));
83}
84
85/// Convenience function for running wc in idiomatic fashion
86/// (i.e.) errors are printed to user and the program exits.
87pub fn run_wc_lines(src: &str) {
88    let wc_res = wc_lines(src).user_err("Error in wc_lines");
89    println!("{}", Counts::format(&Counts::null().lines(wc_res)));
90}
91
92pub fn wc_lines(src: &str) -> Result<usize, Error> {
93    let f_in =
94        File::open(src).sfw_err(&format!("Couldn't open source: {}", &src))?;
95    wc_lines_file(&f_in)
96}
97
98/// In Chapter 1, page 15 of Software Tools, the authors discuss the
99/// hazards of boundary conditions in programming. Certainly this is still
100/// a problem in Rust, but using Rust's functional programming facilities,
101/// and types can help to greatly reduce the occurrence of such errors.
102pub fn wc_lines_file(f_in: &File) -> Result<usize, Error> {
103    BytesIter::new(f_in, DEFAULT_BUF_SIZE)
104        .try_fold(0_usize, |ac_tot, b_slice| {
105            Ok(ac_tot + num_newlines(&b_slice?))
106        })
107}
108
109/// Convenience function for running wc in idiomatic fashion
110/// (i.e.) errors are printed to user and the program exits.
111pub fn run_wc_bytes(src: &str) {
112    let wc_res = wc_bytes(src).user_err("Error in wc_bytes");
113    println!("{} {}", wc_res, &src);
114}
115
116pub fn wc_bytes(src: &str) -> Result<usize, Error> {
117    let f_in =
118        File::open(src).sfw_err(&format!("Couldn't open source: {}", &src))?;
119    wc_bytes_file(&f_in)
120}
121
122pub fn wc_bytes_file(f_in: &File) -> Result<usize, Error> {
123    BytesIter::new(f_in, DEFAULT_BUF_SIZE)
124        .try_fold(0_usize, |ac_tot, b_slice| Ok(ac_tot + b_slice?.len()))
125}
126
127/// Convenience function for running wc in idiomatic fashion
128/// (i.e.) errors are printed to user and the program exits.
129pub fn run_wc_words(src: &str) {
130    let wc_res = wc_words(src).user_err("Error in wc_words");
131    println!("{}", Counts::format(&Counts::null().words(wc_res)));
132}
133
134pub fn wc_words(src: &str) -> Result<usize, Error> {
135    let f_in =
136        File::open(src).sfw_err(&format!("Couldn't open source: {}", &src))?;
137    wc_words_file(&f_in)
138}
139
140pub fn wc_words_file(f_in: &File) -> Result<usize, Error> {
141    BytesIter::new(f_in, DEFAULT_BUF_SIZE)
142        .try_fold(0_usize, |ac_tot, b_slice| {
143            Ok(ac_tot + word_count(b_slice?.as_slice()))
144        })
145}
146
147/// The class of a character.
148#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)]
149enum CharType {
150    /// The character represents a whitespace separator.
151    IsSpace,
152    /// The character does not represent a whitespace separator.
153    NotSpace,
154}
155
156impl From<&u8> for CharType {
157    fn from(other: &u8) -> Self {
158        if other.is_ascii_whitespace() {
159            // A line-feed is considered an ASCII whitespace
160            // character by `is_ascii_whitespace`.
161            CharType::IsSpace
162        } else {
163            CharType::NotSpace
164        }
165    }
166}
167
168#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)]
169struct WordCount {
170    current: CharType,
171    count: usize,
172}
173
174impl From<&u8> for WordCount {
175    fn from(other: &u8) -> Self {
176        WordCount {
177            current: CharType::from(other),
178            count: 0,
179        }
180    }
181}
182
183const WORD_COUNT_0: WordCount = WordCount {
184    current: CharType::IsSpace,
185    count: 0,
186};
187
188impl Empty for WordCount {
189    fn empty() -> Self {
190        WORD_COUNT_0
191    }
192}
193//
194impl Semigroup for WordCount {
195    fn combine(self, other: Self) -> Self {
196        let new_count = match other.current {
197            CharType::IsSpace => self.count,
198            CharType::NotSpace => match self.current {
199                CharType::IsSpace => self.count + 1,
200                CharType::NotSpace => self.count,
201            },
202        };
203        WordCount {
204            current: other.current,
205            count: new_count,
206        }
207    }
208}
209//
210impl Monoid for WordCount {}
211
212pub fn word_count(b_slice: &[u8]) -> usize {
213    b_slice
214        .iter()
215        .map(WordCount::from)
216        .fold(Empty::empty(), Semigroup::combine)
217        .count
218}
219
220pub fn num_newlines(b_slice: &[u8]) -> usize {
221    b_slice.iter().fold(
222        0_usize,
223        |ac, bt| {
224            if is_newline(*bt) {
225                ac + 1
226            } else {
227                ac
228            }
229        },
230    )
231}
232
233#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)]
234pub struct Counts {
235    pub bytes: Option<usize>,
236    pub words: Option<usize>,
237    pub lines: Option<usize>,
238}
239
240impl Counts {
241    const fn new(bytes: usize, words: usize, lines: usize) -> Self {
242        Counts {
243            bytes: Some(bytes),
244            words: Some(words),
245            lines: Some(lines),
246        }
247    }
248    const fn empty() -> Self {
249        Self::new(0, 0, 0)
250    }
251    const fn null() -> Self {
252        Counts {
253            bytes: None,
254            words: None,
255            lines: None,
256        }
257    }
258    const fn bytes(self, bytes: usize) -> Self {
259        Counts {
260            bytes: Some(bytes),
261            ..self
262        }
263    }
264    const fn words(self, words: usize) -> Self {
265        Counts {
266            words: Some(words),
267            ..self
268        }
269    }
270    const fn lines(self, lines: usize) -> Self {
271        Counts {
272            lines: Some(lines),
273            ..self
274        }
275    }
276    //TODO: const
277    fn format(&self) -> String {
278        let b_str = opt_as_empty_str(self.bytes);
279        let w_str = opt_as_empty_str(self.words);
280        let l_str = opt_as_empty_str(self.lines);
281        format!("{} {} {}", l_str, w_str, b_str)
282    }
283}
284
285/// Representation of a chunk of text.
286///
287/// All of the Flux-based code below is inspired
288/// [by Martin Mroz](https://github.com/martinmroz/wc_rs)
289/// The result of the `wc` operation.
290#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)]
291struct Flux {
292    /// The type of the left-most character in the chunk.
293    pub left_char_type: CharType,
294    /// The number of bytes in the chunk.
295    pub bytes: usize,
296    /// The number of words in the chunk.
297    pub words: usize,
298    /// The number of lines in the chunk.
299    pub lines: usize,
300    /// The type of the right-most character in the chunk.
301    pub right_char_type: CharType,
302}
303
304impl Flux {
305    /// Returns a new instance of the receiver with the provided parameters.
306    fn new(
307        left_char_type: CharType,
308        bytes: usize,
309        words: usize,
310        lines: usize,
311        right_char_type: CharType,
312    ) -> Self {
313        Flux {
314            left_char_type,
315            bytes,
316            words,
317            lines,
318            right_char_type,
319        }
320    }
321
322    /// Returns a new Flux spanning the receiver on the left, and `rhs` on the right.
323    fn span(self, rhs: Flux) -> Self {
324        let words = {
325            // If the span is formed along a non-space to non-space
326            // boundary the word count is one less than the sum.
327            if let (CharType::NotSpace, CharType::NotSpace) =
328                (self.right_char_type, rhs.left_char_type)
329            {
330                self.words + rhs.words - 1
331            } else {
332                self.words + rhs.words
333            }
334        };
335
336        Flux::new(
337            self.left_char_type,
338            self.bytes + rhs.bytes,
339            words,
340            self.lines + rhs.lines,
341            rhs.right_char_type,
342        )
343    }
344}
345
346#[derive(Copy, Clone, Eq, PartialEq, Debug)]
347enum FluxMay {
348    FluxSome(Flux),
349    FluxEmpty,
350}
351use FluxMay::*;
352
353impl FluxMay {
354    /// Returns a new instance of the receiver with the provided parameters.
355    fn new(
356        left_char_type: CharType,
357        bytes: usize,
358        words: usize,
359        lines: usize,
360        right_char_type: CharType,
361    ) -> Self {
362        FluxMay::FluxSome(Flux::new(
363            left_char_type,
364            bytes,
365            words,
366            lines,
367            right_char_type,
368        ))
369    }
370
371    fn counts(&self) -> Counts {
372        match self {
373            FluxSome(flux) => Counts::new(flux.bytes, flux.words, flux.lines),
374            FluxEmpty => Counts::empty(),
375        }
376    }
377}
378
379impl Empty for FluxMay {
380    fn empty() -> Self {
381        FluxMay::FluxEmpty
382    }
383}
384//
385impl Semigroup for FluxMay {
386    fn combine(self, other: Self) -> Self {
387        match other {
388            FluxEmpty => self,
389            FluxSome(other_flux) => match self {
390                FluxEmpty => other,
391                FluxSome(self_flux) => {
392                    FluxSome(Flux::span(self_flux, other_flux))
393                }
394            },
395        }
396    }
397}
398//
399impl Monoid for FluxMay {}
400
401impl From<&[u8]> for FluxMay {
402    /// Creates a new instance of a Flux encoding a buffer.
403    fn from(buf: &[u8]) -> Self {
404        if buf.is_empty() {
405            FluxMay::FluxEmpty
406        } else {
407            // A line-feed is considered an ASCII whitespace
408            // character by `is_ascii_whitespace`.
409            let lines = num_newlines(buf);
410            let first_char = CharType::from(buf.first().unwrap_or(&b' '));
411            let last_char = CharType::from(buf.last().unwrap_or(&b' '));
412
413            FluxMay::new(
414                first_char,
415                buf.len(),
416                word_count(buf),
417                lines,
418                last_char,
419            )
420        }
421    }
422}
423
424/// Convenience function for running wc in idiomatic fashion
425/// (i.e.) errors are printed to user and the program exits.
426pub fn run_wc_all(src: &str) {
427    let wc_res = wc_all(src).user_err("Error in wc_all");
428    println!("{}", Counts::format(&wc_res));
429}
430
431pub fn wc_all(src: &str) -> Result<Counts, Error> {
432    let f_in =
433        File::open(src).sfw_err(&format!("Couldn't open source: {}", &src))?;
434    wc_all_file(&f_in)
435}
436
437pub fn wc_all_file(f_in: &File) -> Result<Counts, Error> {
438    BytesIter::new(f_in, DEFAULT_BUF_SIZE)
439        .try_fold(FluxEmpty, |flux_may, b_slice| {
440            Ok(Semigroup::combine(
441                flux_may,
442                FluxMay::from(b_slice?.as_slice()),
443            ))
444        })
445        .map(|f| FluxMay::counts(&f))
446}
447
448#[cfg(test)]
449mod tests {
450    use super::*;
451
452    #[test]
453    fn test_word_count_over_byte_string() {
454        let num_words1 = word_count("testing one\ntwo three".as_bytes());
455        assert_eq!(num_words1, 4);
456        let num_words2 = word_count("testing one\ntwo three\n".as_bytes());
457        assert_eq!(num_words2, 4);
458        let num_words3 = word_count("\ntesting one\ntwo three".as_bytes());
459        assert_eq!(num_words3, 4);
460        let num_words4 = word_count(" testing one  two three\n  ".as_bytes());
461        assert_eq!(num_words4, 4);
462    }
463
464    #[test]
465    fn test_flux_may_from() {
466        assert_eq!(
467            FluxMay::from("testing one two three ".as_bytes()),
468            FluxSome(Flux::new(
469                CharType::NotSpace,
470                22,
471                4,
472                0,
473                CharType::IsSpace
474            ))
475        );
476    }
477
478    #[test]
479    fn test_flux_may_combine() {
480        let flux_l = FluxMay::from("testing on".as_bytes());
481        let flux_r = FluxMay::from("e two three".as_bytes());
482
483        assert_eq!(
484            Semigroup::combine(flux_l, flux_r),
485            FluxSome(Flux::new(
486                CharType::NotSpace,
487                21,
488                4,
489                0,
490                CharType::NotSpace
491            ))
492        );
493    }
494
495    #[test]
496    fn test_flux_may_combine_space() {
497        let flux_l = FluxMay::from("testing one ".as_bytes());
498        let flux_r = FluxMay::from(" two three".as_bytes());
499
500        assert_eq!(
501            Semigroup::combine(flux_l, flux_r),
502            FluxSome(Flux::new(
503                CharType::NotSpace,
504                22,
505                4,
506                0,
507                CharType::NotSpace
508            ))
509        );
510    }
511}