tuc/
cut_str.rs

1use anyhow::Result;
2use bstr::ByteSlice;
3use bstr::io::BufReadExt;
4use std::io::{BufRead, Write};
5
6use crate::bounds::{BoundOrFiller, BoundsType, UserBoundsList};
7use crate::finders::common::DelimiterFinder;
8use crate::options::{EOL, Opt, Trim};
9use crate::plan::FieldPlan;
10
11#[cfg(feature = "regex")]
12use regex::bytes::Regex;
13
14fn compress_delimiter(line: &[u8], delimiter: &[u8], output: &mut Vec<u8>) {
15    output.clear();
16    let mut prev_idx = 0;
17
18    for idx in line.find_iter(delimiter) {
19        let prev_part = &line[prev_idx..idx];
20
21        if idx == 0 {
22            output.extend(delimiter);
23        } else if !prev_part.is_empty() {
24            output.extend(prev_part);
25            output.extend(delimiter);
26        }
27
28        prev_idx = idx + delimiter.len();
29    }
30
31    if prev_idx < line.len() {
32        output.extend(&line[prev_idx..]);
33    }
34}
35
36#[cfg(feature = "regex")]
37fn compress_delimiter_with_regex<'a>(
38    line: &'a [u8],
39    re: &Regex,
40    new_delimiter: &[u8],
41) -> std::borrow::Cow<'a, [u8]> {
42    re.replace_all(line, new_delimiter)
43}
44
45fn trim<'a>(buffer: &'a [u8], trim_kind: &Trim, delimiter: &[u8]) -> &'a [u8] {
46    match trim_kind {
47        Trim::Both => {
48            let mut idx = 0;
49            let mut r_idx = buffer.len();
50
51            while buffer[idx..].starts_with(delimiter) {
52                idx += delimiter.len();
53            }
54
55            while buffer[idx..r_idx].ends_with(delimiter) {
56                r_idx -= delimiter.len();
57            }
58
59            &buffer[idx..r_idx]
60        }
61        Trim::Left => {
62            let mut idx = 0;
63
64            while buffer[idx..].starts_with(delimiter) {
65                idx += delimiter.len();
66            }
67
68            &buffer[idx..]
69        }
70        Trim::Right => {
71            let mut r_idx = buffer.len();
72
73            while buffer[..r_idx].ends_with(delimiter) {
74                r_idx -= delimiter.len();
75            }
76
77            &buffer[..r_idx]
78        }
79    }
80}
81
82#[cfg(feature = "regex")]
83fn trim_regex<'a>(line: &'a [u8], trim_kind: &Trim, re: &Regex) -> &'a [u8] {
84    let mut iter = re.find_iter(line);
85    let mut idx_start = 0;
86    let mut idx_end = line.len();
87
88    if (trim_kind == &Trim::Both || trim_kind == &Trim::Left)
89        && let Some(m) = iter.next()
90        && m.start() == 0
91    {
92        idx_start = m.end();
93    }
94
95    if (trim_kind == &Trim::Both || trim_kind == &Trim::Right)
96        && let Some(m) = iter.last()
97        && m.end() == line.len()
98    {
99        idx_end = m.start();
100    }
101
102    &line[idx_start..idx_end]
103}
104
105macro_rules! write_maybe_as_json {
106    ($writer:ident, $to_print:ident, $as_json:expr) => {{
107        if $as_json {
108            let x;
109            $writer.write_all(unsafe {
110                // Safe as long as we were not requested to cut in the middle of a codepoint
111                // (and then we're pretty much doing what was asked)
112                x = serde_json::to_string(std::str::from_utf8_unchecked(&$to_print))?;
113                x.as_bytes()
114            })?;
115        } else {
116            $writer.write_all(&$to_print)?;
117        }
118    }};
119}
120
121pub fn cut_str<W: Write, F, R>(
122    line: &[u8],
123    opt: &Opt,
124    stdout: &mut W,
125    compressed_line_buf: &mut Vec<u8>,
126    eol: &[u8],
127    plan: &mut FieldPlan<F, R>,
128) -> Result<()>
129where
130    F: DelimiterFinder,
131    R: DelimiterFinder,
132{
133    let mut line = line;
134
135    if let Some(trim_kind) = opt.trim {
136        if opt.regex_bag.is_some() {
137            #[cfg(feature = "regex")]
138            {
139                line = trim_regex(line, &trim_kind, &opt.regex_bag.as_ref().unwrap().greedy);
140            }
141        } else {
142            line = trim(line, &trim_kind, &opt.delimiter);
143        }
144    }
145
146    if line.is_empty() {
147        if !opt.only_delimited {
148            stdout.write_all(eol)?;
149        }
150        return Ok(());
151    }
152
153    #[cfg(feature = "regex")]
154    let line_holder: std::borrow::Cow<[u8]>;
155
156    if opt.compress_delimiter {
157        if opt.regex_bag.is_some() && cfg!(feature = "regex") {
158            #[cfg(feature = "regex")]
159            {
160                let delimiter = opt.replace_delimiter.as_ref().unwrap(); // we checked earlier the invariant
161                line_holder = compress_delimiter_with_regex(
162                    line,
163                    &opt.regex_bag.as_ref().unwrap().greedy,
164                    delimiter,
165                );
166                line = &line_holder;
167            }
168        } else {
169            compress_delimiter(line, &opt.delimiter, compressed_line_buf);
170            line = compressed_line_buf;
171        }
172    }
173
174    let maybe_maybe_num_fields = (plan.extract_func)(line, plan);
175    let maybe_num_fields = maybe_maybe_num_fields.unwrap_or(None);
176
177    if opt.only_delimited
178        && maybe_num_fields
179            .expect("We didn't use an extract function that counted the number of fields")
180            == 1
181    {
182        // If there's only 1 field it means that there were no delimiters
183        // and when used alogside `only_delimited` we must skip the line
184        return Ok(());
185    }
186
187    if opt.json {
188        stdout.write_all(b"[")?;
189    }
190
191    let mut _bounds: UserBoundsList;
192    let mut bounds = &opt.bounds;
193
194    if opt.complement {
195        _bounds =
196            bounds
197                .complement(maybe_num_fields.expect(
198                    "We didn't use an extract function that counted the number of fields",
199                ))?;
200        bounds = &_bounds;
201
202        if bounds.is_empty() {
203            // If the original bounds matched all the fields, the complement is empty
204            if !opt.only_delimited {
205                stdout.write_all(eol)?;
206            }
207            return Ok(());
208        }
209    }
210
211    if opt.unpack {
212        // Unpack bounds such as 1:3 or 2: into single-field bounds
213        // such as 1:1,2:2,3:3 etc...
214
215        // Start by checking if we actually need to rewrite the bounds
216        // (are there ranges in the first place?), since it's an
217        // expensive operation.
218        if bounds.iter().any(|bof| match bof {
219            BoundOrFiller::Bound(b) => b.l() != b.r(),
220            BoundOrFiller::Filler(_) => false,
221        }) {
222            _bounds = bounds.unpack(
223                maybe_num_fields
224                    .expect("We didn't use an extract function that counted the number of fields"),
225            );
226            bounds = &_bounds;
227        }
228    }
229
230    bounds.iter().try_for_each(|bof| -> Result<()> {
231        let b = match bof {
232            BoundOrFiller::Filler(f) => {
233                stdout.write_all(f.as_bytes())?;
234                return Ok(());
235            }
236            BoundOrFiller::Bound(b) => b,
237        };
238
239        let field = plan.get_field(b, line.len());
240        let output = if let Ok(field) = field {
241            &line[field.start..field.end]
242        } else if b.fallback_oob().is_some() {
243            b.fallback_oob().as_ref().unwrap()
244        } else if let Some(generic_fallback) = &opt.fallback_oob {
245            generic_fallback
246        } else {
247            return Err(field.unwrap_err());
248        };
249
250        let mut field_to_print = output;
251        let output_with_delimiter_replaced;
252
253        if let Some(replace_func) = opt.replace_delimiter_fn {
254            output_with_delimiter_replaced = replace_func(output, opt);
255            field_to_print = &output_with_delimiter_replaced;
256        }
257
258        write_maybe_as_json!(stdout, field_to_print, opt.json);
259
260        if opt.join && !b.is_last() {
261            stdout.write_all(
262                opt.replace_delimiter
263                    .as_ref()
264                    .unwrap_or(&opt.delimiter)
265                    .as_bytes(),
266            )?;
267        }
268
269        Ok(())
270    })?;
271
272    if opt.json {
273        stdout.write_all(b"]")?;
274    }
275
276    stdout.write_all(eol)?;
277
278    Ok(())
279}
280
281pub fn read_and_cut_str<B: BufRead, W: Write>(
282    stdin: &mut B,
283    stdout: &mut W,
284    opt: &Opt,
285) -> Result<()> {
286    let line_buf: Vec<u8> = Vec::with_capacity(1024);
287    let mut compressed_line_buf = if opt.compress_delimiter {
288        Vec::with_capacity(line_buf.capacity())
289    } else {
290        Vec::new()
291    };
292
293    // Determine which plan type to use based on options
294    let should_compress_delimiter = opt.compress_delimiter
295        && (opt.bounds_type == BoundsType::Fields || opt.bounds_type == BoundsType::Lines);
296
297    #[cfg(feature = "regex")]
298    let maybe_regex = opt.regex_bag.as_ref().map(|x| {
299        if opt.greedy_delimiter {
300            &x.greedy
301        } else {
302            &x.normal
303        }
304    });
305    #[cfg(not(feature = "regex"))]
306    let maybe_regex: Option<()> = None;
307
308    if should_compress_delimiter && maybe_regex.is_some() && opt.replace_delimiter.is_some() {
309        // Special case: compressed delimiter + regex + delimiter replacement.
310        // We setup now the search plan, taking into account that when we start searching
311        // for the delimiter it will have been already replaced (so we won't use
312        // the regex to search for the original delimiter, we will do a fixed-string search
313        // for the new delimiter).
314        let replace_delimiter = opt.replace_delimiter.as_ref().unwrap();
315        let mut plan = FieldPlan::from_opt_fixed_with_custom_delimiter(opt, replace_delimiter)?;
316
317        process_lines_with_plan(stdin, stdout, opt, &mut compressed_line_buf, &mut plan)
318    } else if maybe_regex.is_some() {
319        #[cfg(feature = "regex")]
320        {
321            let regex = maybe_regex.unwrap();
322            let trim_empty = opt.bounds_type == BoundsType::Characters;
323            let mut plan = FieldPlan::from_opt_regex(opt, regex.clone(), trim_empty)?;
324            process_lines_with_plan(stdin, stdout, opt, &mut compressed_line_buf, &mut plan)
325        }
326        #[cfg(not(feature = "regex"))]
327        {
328            unreachable!()
329        }
330    } else if opt.greedy_delimiter {
331        let mut plan = FieldPlan::from_opt_fixed_greedy(opt)?;
332        process_lines_with_plan(stdin, stdout, opt, &mut compressed_line_buf, &mut plan)
333    } else {
334        // Default memmem case
335        let mut plan = FieldPlan::from_opt_fixed(opt)?;
336        process_lines_with_plan(stdin, stdout, opt, &mut compressed_line_buf, &mut plan)
337    }
338}
339
340// Generic helper function that works with any plan type
341fn process_lines_with_plan<B, W, F, R>(
342    stdin: &mut B,
343    stdout: &mut W,
344    opt: &Opt,
345    compressed_line_buf: &mut Vec<u8>,
346    plan: &mut FieldPlan<F, R>,
347) -> Result<()>
348where
349    B: BufRead,
350    W: Write,
351    F: DelimiterFinder,
352    R: DelimiterFinder,
353{
354    match (opt.read_to_end, opt.eol) {
355        (false, EOL::Newline) => stdin.for_byte_line(|line| {
356            cut_str(
357                line,
358                opt,
359                stdout,
360                compressed_line_buf,
361                &[opt.eol.into()],
362                plan,
363            )
364            .map_err(|x| {
365                x.downcast::<std::io::Error>()
366                    .unwrap_or_else(|e| std::io::Error::other(e.to_string()))
367            })
368            .and(Ok(true))
369        })?,
370        (false, EOL::Zero) => stdin.for_byte_record(opt.eol.into(), |line| {
371            cut_str(
372                line,
373                opt,
374                stdout,
375                compressed_line_buf,
376                &[opt.eol.into()],
377                plan,
378            )
379            .map_err(|x| {
380                x.downcast::<std::io::Error>()
381                    .unwrap_or_else(|e| std::io::Error::other(e.to_string()))
382            })
383            .and(Ok(true))
384        })?,
385        (true, _) => {
386            let mut line: Vec<u8> = Vec::new();
387            stdin.read_to_end(&mut line)?;
388            let line = line.strip_suffix(opt.delimiter.as_slice()).unwrap_or(&line);
389            cut_str(line, opt, stdout, compressed_line_buf, &opt.delimiter, plan)?
390        }
391    }
392    Ok(())
393}
394
395#[cfg(test)]
396mod tests {
397    use crate::{bounds::UserBoundsList, options::EOL};
398
399    #[cfg(feature = "regex")]
400    use crate::options::{RegexBag, Trim};
401
402    use std::{io::Cursor, str::FromStr};
403
404    use super::*;
405
406    fn make_fields_opt() -> Opt {
407        Opt {
408            bounds_type: BoundsType::Fields,
409            delimiter: "-".into(),
410            ..Opt::default()
411        }
412    }
413
414    #[cfg(feature = "regex")]
415    fn make_regex_bag() -> RegexBag {
416        RegexBag {
417            normal: Regex::from_str("[.,]").unwrap(),
418            greedy: Regex::from_str("([.,])+").unwrap(),
419        }
420    }
421
422    #[cfg(feature = "regex")]
423    fn make_cut_characters_regex_bag() -> RegexBag {
424        RegexBag {
425            normal: Regex::from_str("\\b|\\B").unwrap(),
426            greedy: Regex::from_str("(\\b|\\B)+").unwrap(),
427        }
428    }
429
430    #[test]
431    fn test_read_and_cut_str_echo_non_delimited_strings() {
432        // read_and_cut_str is difficult to test, let's verify at least
433        // that it reads the input and appears to call cut_str
434
435        let opt = make_fields_opt();
436        let mut input = b"foo".as_slice();
437        let mut output = Vec::new();
438        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
439        assert_eq!(output, b"foo\n".as_slice());
440    }
441
442    #[test]
443    fn test_read_and_cut_str_echo_non_delimited_strings_with_eol_zero() {
444        // read_and_cut_str is difficult to test, let's verify at least
445        // that it reads the input and appears to call cut_str
446
447        let mut opt = make_fields_opt();
448        opt.eol = EOL::Zero;
449        let mut input = b"foo".as_slice();
450        let mut output = Vec::new();
451        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
452        assert_eq!(output, b"foo\0".as_slice());
453    }
454
455    fn make_cut_str_buffers() -> (Vec<u8>, Vec<u8>) {
456        let output = Vec::new();
457        let compressed_line_buffer = Vec::new();
458        (output, compressed_line_buffer)
459    }
460
461    #[test]
462    fn read_and_cut_str_echo_non_delimited_strings() {
463        let opt = make_fields_opt();
464
465        let line = b"foo";
466
467        // non-empty line missing the delimiter
468        let (mut output, _) = make_cut_str_buffers();
469        let mut input = Cursor::new(line);
470        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
471        assert_eq!(output, b"foo\n".as_slice());
472
473        // empty line
474        let line = b"";
475        let (mut output, _) = make_cut_str_buffers();
476        let mut input = Cursor::new(line);
477        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
478        assert_eq!(output, b"".as_slice());
479    }
480
481    #[test]
482    fn read_and_cut_str_skip_non_delimited_strings_when_requested() {
483        let mut opt = make_fields_opt();
484
485        opt.only_delimited = true;
486
487        // non-empty line missing the delimiter
488        let line = b"foo";
489        let (mut output, _) = make_cut_str_buffers();
490        let mut input = Cursor::new(line);
491        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
492        assert_eq!(output, b"".as_slice());
493
494        // empty line
495        let line = b"";
496        let (mut output, _) = make_cut_str_buffers();
497        let mut input = Cursor::new(line);
498        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
499        assert_eq!(output, b"".as_slice());
500    }
501
502    #[test]
503    fn read_and_cut_str_it_cut_a_field() {
504        let mut opt = make_fields_opt();
505        let (mut output, _) = make_cut_str_buffers();
506
507        let line = b"a-b-c";
508        opt.bounds = UserBoundsList::from_str("1").unwrap();
509
510        let mut input = Cursor::new(line);
511        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
512        assert_eq!(output, b"a\n".as_slice());
513    }
514
515    #[test]
516    fn read_and_cut_str_it_cut_ranges() {
517        let mut opt = make_fields_opt();
518        let (mut output, _) = make_cut_str_buffers();
519
520        let line = b"a-b-c";
521        opt.bounds = UserBoundsList::from_str("1,1:3").unwrap();
522
523        let mut input = Cursor::new(line);
524        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
525        assert_eq!(output, b"aa-b-c\n".as_slice());
526    }
527
528    #[cfg(feature = "regex")]
529    #[test]
530    fn read_and_cut_str_regex_it_cut_a_field() {
531        let mut opt = make_fields_opt();
532        let (mut output, _) = make_cut_str_buffers();
533
534        let line = b"a.b,c";
535        opt.bounds = UserBoundsList::from_str("1,2,3").unwrap();
536        opt.regex_bag = Some(make_regex_bag());
537
538        let mut input = Cursor::new(line);
539        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
540        assert_eq!(output, b"abc\n".as_slice());
541    }
542
543    #[cfg(feature = "regex")]
544    #[test]
545    fn test_trim_regex_left_match() {
546        let line: &[u8] = b"---a-b---";
547        let trim_kind = Trim::Left;
548        let regex = Regex::new("-+").unwrap();
549        let result = trim_regex(line, &trim_kind, &regex);
550
551        assert_eq!(result, b"a-b---");
552    }
553
554    #[cfg(feature = "regex")]
555    #[test]
556    fn test_trim_regex_left_no_match_risk_wrong_match() {
557        let line: &[u8] = b"a-b---";
558        let trim_kind = Trim::Left;
559        let regex = Regex::new("-+").unwrap();
560        let result = trim_regex(line, &trim_kind, &regex);
561
562        assert_eq!(result, b"a-b---");
563    }
564
565    #[cfg(feature = "regex")]
566    #[test]
567    fn test_trim_regex_left_no_match() {
568        let line: &[u8] = b"abc";
569        let trim_kind = Trim::Left;
570        let regex = Regex::new("-+").unwrap();
571        let result = trim_regex(line, &trim_kind, &regex);
572
573        assert_eq!(result, b"abc");
574    }
575
576    #[cfg(feature = "regex")]
577    #[test]
578    fn test_trim_regex_right() {
579        let line: &[u8] = b"---a-b---";
580        let trim_kind = Trim::Right;
581        let regex = Regex::new("-+").unwrap();
582        let result = trim_regex(line, &trim_kind, &regex);
583
584        assert_eq!(result, b"---a-b");
585    }
586
587    #[cfg(feature = "regex")]
588    #[test]
589    fn test_trim_regex_right_no_match() {
590        let line: &[u8] = b"---a-b";
591        let trim_kind = Trim::Right;
592        let regex = Regex::new("-+").unwrap();
593        let result = trim_regex(line, &trim_kind, &regex);
594
595        assert_eq!(result, b"---a-b");
596    }
597
598    #[cfg(feature = "regex")]
599    #[test]
600    fn test_trim_regex_both() {
601        let line: &[u8] = b"---a-b---";
602        let trim_kind = Trim::Both;
603        let regex = Regex::new("-+").unwrap();
604        let result = trim_regex(line, &trim_kind, &regex);
605
606        assert_eq!(result, b"a-b");
607    }
608
609    #[cfg(feature = "regex")]
610    #[test]
611    fn test_trim_regex_both_no_match() {
612        let line: &[u8] = b"a-b";
613        let trim_kind = Trim::Both;
614        let regex = Regex::new("-+").unwrap();
615        let result = trim_regex(line, &trim_kind, &regex);
616
617        assert_eq!(result, b"a-b");
618    }
619
620    #[test]
621    fn cut_str_it_cut_consecutive_delimiters() {
622        let mut opt = make_fields_opt();
623        let (mut output, _) = make_cut_str_buffers();
624
625        let line = b"a-b-c";
626        opt.bounds = UserBoundsList::from_str("1,3").unwrap();
627
628        let mut input = Cursor::new(line);
629        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
630        assert_eq!(output, b"ac\n".as_slice());
631    }
632
633    #[test]
634    fn cut_str_it_compress_delimiters() {
635        let mut opt = make_fields_opt();
636        opt.bounds = UserBoundsList::from_str("2").unwrap();
637
638        let line = b"--a---b--";
639
640        // first we verify we get an empty string without compressing delimiters
641        let (mut output, _) = make_cut_str_buffers();
642        opt.compress_delimiter = false;
643        let mut input = Cursor::new(line);
644        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
645        assert_eq!(output, b"\n".as_slice());
646
647        // now we do it again while compressing delimiters
648        let (mut output, _) = make_cut_str_buffers();
649        opt.compress_delimiter = true;
650        let mut input = Cursor::new(line);
651        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
652        assert_eq!(output, b"a\n".as_slice());
653
654        // and again but this time requesting a full range
655        let (mut output, _) = make_cut_str_buffers();
656        opt.bounds = UserBoundsList::from_str("1:").unwrap();
657        opt.compress_delimiter = true;
658        let mut input = Cursor::new(line);
659        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
660        assert_eq!(output, b"-a-b-\n".as_slice());
661
662        // let's check with a line that doesn't start/end with delimiters
663        let line = b"a---b";
664        let (mut output, _) = make_cut_str_buffers();
665        opt.bounds = UserBoundsList::from_str("1:").unwrap();
666        opt.compress_delimiter = true;
667        let mut input = Cursor::new(line);
668        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
669        assert_eq!(output, b"a-b\n".as_slice());
670    }
671
672    #[cfg(feature = "regex")]
673    #[test]
674    fn cut_str_regex_it_compress_delimiters() {
675        let mut opt = make_fields_opt();
676
677        let line = b".,a,,,b..c";
678        let (mut output, _) = make_cut_str_buffers();
679        opt.bounds = UserBoundsList::from_str("2,3,4").unwrap();
680        opt.compress_delimiter = true;
681        opt.regex_bag = Some(make_regex_bag());
682        opt.replace_delimiter = Some("-".into());
683
684        let mut input = Cursor::new(line);
685        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
686        assert_eq!(output, b"abc\n".as_slice());
687
688        let line = b".,a,,,b..c";
689        let (mut output, _) = make_cut_str_buffers();
690        opt.bounds = UserBoundsList::from_str("1:").unwrap();
691        opt.compress_delimiter = true;
692        opt.regex_bag = Some(make_regex_bag());
693        opt.replace_delimiter = Some("-".into());
694
695        let mut input = Cursor::new(line);
696        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
697        assert_eq!(output, b"-a-b-c\n".as_slice());
698    }
699
700    #[cfg(feature = "regex")]
701    #[test]
702    fn cut_str_it_cut_characters() {
703        let mut opt = make_fields_opt();
704        let (mut output, _) = make_cut_str_buffers();
705
706        let line = "😁🤩😝😎".as_bytes();
707        opt.bounds = UserBoundsList::from_str("2").unwrap();
708        opt.bounds_type = BoundsType::Characters;
709        opt.regex_bag = Some(make_cut_characters_regex_bag());
710
711        let mut input = Cursor::new(line);
712        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
713        assert_eq!(output, "🤩\n".as_bytes());
714    }
715
716    #[cfg(feature = "regex")]
717    #[test]
718    fn cut_str_it_cut_characters_and_replace_the_delimiter() {
719        let opt: Opt = "-c 1,2,3:4 -r - ".parse().unwrap();
720        let (mut output, _) = make_cut_str_buffers();
721
722        let line = "😁🤩😝😎".as_bytes();
723
724        let mut input = Cursor::new(line);
725        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
726        assert_eq!(&String::from_utf8_lossy(&output), "😁-🤩-😝-😎\n");
727    }
728
729    #[test]
730    fn cut_str_it_supports_zero_terminated_lines() {
731        let mut opt = make_fields_opt();
732        let (mut output, _) = make_cut_str_buffers();
733
734        let line = b"a-b-c";
735        opt.bounds = UserBoundsList::from_str("2").unwrap();
736        opt.eol = EOL::Zero;
737
738        let mut input = Cursor::new(line);
739        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
740        assert_eq!(output, b"b\0".as_slice());
741    }
742
743    #[test]
744    fn cut_str_it_complement_ranges() {
745        let mut opt = make_fields_opt();
746        let (mut output, _) = make_cut_str_buffers();
747
748        let line = b"a-b-c";
749        opt.bounds = UserBoundsList::from_str("2").unwrap();
750        opt.complement = true;
751
752        let mut input = Cursor::new(line);
753        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
754        assert_eq!(output, b"ac\n".as_slice());
755    }
756
757    #[test]
758    fn cut_str_it_join_fields() {
759        let mut opt = make_fields_opt();
760        let (mut output, _) = make_cut_str_buffers();
761
762        let line = b"a-b-c";
763        opt.bounds = UserBoundsList::from_str("1,3").unwrap();
764        opt.join = true;
765
766        let mut input = Cursor::new(line);
767        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
768        assert_eq!(output, b"a-c\n".as_slice());
769    }
770
771    #[test]
772    fn cut_str_it_join_fields_with_a_custom_delimiter() {
773        let mut opt = make_fields_opt();
774        let (mut output, _) = make_cut_str_buffers();
775
776        let line = b"a-b-c";
777        opt.bounds = UserBoundsList::from_str("1,3").unwrap();
778        opt.join = true;
779        opt.replace_delimiter = Some("*".into());
780
781        let mut input = Cursor::new(line);
782        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
783        assert_eq!(output, b"a*c\n".as_slice());
784    }
785
786    #[test]
787    fn cut_str_it_replace_delimiter() {
788        let opt: Opt = "-d - -f 1:3 -r _".parse().unwrap();
789        let (mut output, _) = make_cut_str_buffers();
790
791        let line = b"a-b-c";
792
793        let mut input = Cursor::new(line);
794        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
795        assert_eq!(output, b"a_b_c\n".as_slice());
796    }
797
798    #[cfg(feature = "regex")]
799    #[test]
800    fn cut_str_regex_it_replace_delimiter() {
801        let opt: Opt = "-e [,] -f 1:3 -r _".parse().unwrap();
802        let (mut output, _) = make_cut_str_buffers();
803
804        let line = b"a,b,c";
805
806        let mut input = Cursor::new(line);
807        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
808        assert_eq!(output, b"a_b_c\n".as_slice());
809    }
810
811    #[test]
812    fn cut_str_it_compress_and_replace_delimiter() {
813        let opt: Opt = "-d - -f 1:3 -r _ -p".parse().unwrap();
814        let (mut output, _) = make_cut_str_buffers();
815
816        let line = b"a--b--c";
817
818        let mut input = Cursor::new(line);
819        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
820        assert_eq!(output, b"a_b_c\n".as_slice());
821    }
822
823    #[cfg(feature = "regex")]
824    #[test]
825    fn cut_str_regex_it_compress_and_replace_delimiter() {
826        let opt: Opt = "-e [,] -f 1:3 -r _ -p".parse().unwrap();
827        let (mut output, _) = make_cut_str_buffers();
828
829        let line = b"a,,b,,c";
830
831        let mut input = Cursor::new(line);
832        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
833        assert_eq!(output, b"a_b_c\n".as_slice());
834    }
835
836    #[cfg(feature = "regex")]
837    #[test]
838    fn cut_str_regex_it_join_fields_with_a_custom_delimiter() {
839        let mut opt = make_fields_opt();
840        let (mut output, _) = make_cut_str_buffers();
841
842        let line = b"a.b,c";
843        opt.bounds = UserBoundsList::from_str("1,3").unwrap();
844        opt.delimiter = "[.,]".into();
845        opt.regex_bag = Some(make_regex_bag());
846        opt.join = true;
847        opt.replace_delimiter = Some("<->".into());
848
849        let mut input = Cursor::new(line);
850        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
851        assert_eq!(output, b"a<->c\n".as_slice());
852    }
853
854    #[test]
855    fn cut_str_it_format_fields() {
856        let mut opt = make_fields_opt();
857        let (mut output, _) = make_cut_str_buffers();
858
859        let line = b"a-b-c";
860        opt.bounds = UserBoundsList::from_str("{1} < {3} > {2}").unwrap();
861
862        let mut input = Cursor::new(line);
863        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
864        assert_eq!(output, b"a < c > b\n".as_slice());
865    }
866
867    #[test]
868    fn cut_str_supports_greedy_delimiter() {
869        let mut opt = make_fields_opt();
870        let (mut output, _) = make_cut_str_buffers();
871
872        let line = b"a---b---c";
873        opt.bounds = UserBoundsList::from_str("2").unwrap();
874        opt.greedy_delimiter = true;
875
876        let mut input = Cursor::new(line);
877        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
878        assert_eq!(output, b"b\n".as_slice());
879
880        // check that, opposite to compress_delimiter, the delimiter is kept long
881        let mut opt = make_fields_opt();
882        let (mut output, _) = make_cut_str_buffers();
883
884        let line = b"a---b---c";
885        opt.bounds = UserBoundsList::from_str("2:3").unwrap();
886        opt.greedy_delimiter = true;
887
888        let mut input = Cursor::new(line);
889        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
890        assert_eq!(output, b"b---c\n".as_slice());
891    }
892
893    #[cfg(feature = "regex")]
894    #[test]
895    fn cut_str_regex_supports_greedy_delimiter() {
896        // also check that, contrary to compress_delimiter, the delimiter is kept long
897        let mut opt = make_fields_opt();
898        let (mut output, _) = make_cut_str_buffers();
899
900        let line = b"a,,.,b..,,c";
901        opt.bounds = UserBoundsList::from_str("2:3").unwrap();
902
903        opt.greedy_delimiter = true;
904        opt.delimiter = "[.,]".into();
905        opt.regex_bag = Some(make_regex_bag());
906
907        let mut input = Cursor::new(line);
908        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
909        assert_eq!(output, b"b..,,c\n".as_slice());
910    }
911
912    #[test]
913    fn cut_str_it_trim_fields() {
914        let mut opt = make_fields_opt();
915        let line = b"--a--b--c--";
916
917        // check Trim::Both
918        opt.trim = Some(Trim::Both);
919        opt.bounds = UserBoundsList::from_str("1,3,-1").unwrap();
920
921        let (mut output, _) = make_cut_str_buffers();
922        let mut input = Cursor::new(line);
923        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
924        assert_eq!(output, b"abc\n".as_slice());
925
926        // check Trim::Left
927        opt.trim = Some(Trim::Left);
928        opt.bounds = UserBoundsList::from_str("1,3,-3").unwrap();
929
930        let (mut output, _) = make_cut_str_buffers();
931        let mut input = Cursor::new(line);
932        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
933        assert_eq!(output, b"abc\n".as_slice());
934
935        // check Trim::Right
936        opt.trim = Some(Trim::Right);
937        opt.bounds = UserBoundsList::from_str("3,5,-1").unwrap();
938
939        let (mut output, _) = make_cut_str_buffers();
940        let mut input = Cursor::new(line);
941        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
942        assert_eq!(output, b"abc\n".as_slice());
943    }
944
945    #[cfg(feature = "regex")]
946    #[test]
947    fn cut_str_regex_it_trim_fields() {
948        let mut opt = make_fields_opt();
949        let line = b"..a,.b..c,,";
950
951        opt.delimiter = "[.,]".into();
952        opt.regex_bag = Some(make_regex_bag());
953
954        // check Trim::Both
955        opt.trim = Some(Trim::Both);
956        opt.bounds = UserBoundsList::from_str("1,3,-1").unwrap();
957
958        let (mut output, _) = make_cut_str_buffers();
959        let mut input = Cursor::new(line);
960        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
961        assert_eq!(output, b"abc\n".as_slice());
962
963        // check Trim::Left
964        opt.trim = Some(Trim::Left);
965        opt.bounds = UserBoundsList::from_str("1,3,-3").unwrap();
966
967        let (mut output, _) = make_cut_str_buffers();
968        let mut input = Cursor::new(line);
969        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
970        assert_eq!(output, b"abc\n".as_slice());
971
972        // check Trim::Right
973        opt.trim = Some(Trim::Right);
974        opt.bounds = UserBoundsList::from_str("3,5,-1").unwrap();
975
976        let (mut output, _) = make_cut_str_buffers();
977        let mut input = Cursor::new(line);
978        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
979        assert_eq!(output, b"abc\n".as_slice());
980    }
981
982    #[test]
983    fn cut_str_it_produce_json_output() {
984        let mut opt = make_fields_opt();
985        opt.json = true;
986        opt.replace_delimiter = Some(",".into());
987        let (mut output, _) = make_cut_str_buffers();
988
989        let line = b"a-b-c";
990        opt.bounds = UserBoundsList::from_str("1,3").unwrap();
991        opt.join = true;
992
993        let mut input = Cursor::new(line);
994        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
995        assert_eq!(
996            output,
997            br#"["a","c"]
998"#
999            .as_slice()
1000        );
1001    }
1002
1003    #[test]
1004    fn cut_str_json_with_single_field_is_still_an_array() {
1005        let mut opt = make_fields_opt();
1006        opt.json = true;
1007        opt.replace_delimiter = Some(",".into());
1008        let (mut output, _) = make_cut_str_buffers();
1009
1010        let line = b"a-b-c";
1011        opt.bounds = UserBoundsList::from_str("1").unwrap();
1012        opt.join = true;
1013
1014        let mut input = Cursor::new(line);
1015        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
1016        assert_eq!(
1017            output,
1018            br#"["a"]
1019"#
1020            .as_slice()
1021        );
1022    }
1023
1024    #[test]
1025    fn cut_str_complement_works_with_json() {
1026        let opt: Opt = "-d - -f 2,2:3,-1 -j --json --complement".parse().unwrap();
1027        let (mut output, _) = make_cut_str_buffers();
1028
1029        let line = b"a-b-c";
1030
1031        let mut input = Cursor::new(line);
1032        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
1033        assert_eq!(
1034            output,
1035            br#"["a","c","a","a","b"]
1036"#
1037            .as_slice()
1038        );
1039    }
1040
1041    #[cfg(feature = "regex")]
1042    #[test]
1043    fn cut_str_json_on_characters_works() {
1044        let opt: Opt = "-c 1,2,3:4 --json".parse().unwrap();
1045        let (mut output, _) = make_cut_str_buffers();
1046
1047        let line = "😁🤩😝😎".as_bytes();
1048
1049        let mut input = Cursor::new(line);
1050        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
1051        assert_eq!(
1052            &String::from_utf8_lossy(&output),
1053            r#"["😁","🤩","😝","😎"]
1054"#
1055        );
1056    }
1057
1058    #[test]
1059    fn test_cut_bytes_stream_cut_simplest_field_with_eol_and_fallbacks() {
1060        let mut opt = make_fields_opt();
1061        let (mut output, _) = make_cut_str_buffers();
1062
1063        let line = b"a";
1064        opt.fallback_oob = Some(b"generic fallback".to_vec());
1065        opt.bounds = UserBoundsList::from_str("{1}-fill-{2}-more fill-{3=last fill}").unwrap();
1066
1067        let mut input = Cursor::new(line);
1068        read_and_cut_str(&mut input, &mut output, &opt).unwrap();
1069
1070        assert_eq!(
1071            &String::from_utf8_lossy(&output),
1072            "a-fill-generic fallback-more fill-last fill\n"
1073        );
1074    }
1075}