1use anyhow::Result;
2use bstr::ByteSlice;
3use bstr::io::BufReadExt;
4use std::io::{BufRead, Write};
5
6use crate::bounds::{BoundOrFiller, BoundsType, UserBoundsList};
7use crate::finders::common::DelimiterFinder;
8use crate::options::{EOL, Opt, Trim};
9use crate::plan::FieldPlan;
10
11#[cfg(feature = "regex")]
12use regex::bytes::Regex;
13
14fn compress_delimiter(line: &[u8], delimiter: &[u8], output: &mut Vec<u8>) {
15 output.clear();
16 let mut prev_idx = 0;
17
18 for idx in line.find_iter(delimiter) {
19 let prev_part = &line[prev_idx..idx];
20
21 if idx == 0 {
22 output.extend(delimiter);
23 } else if !prev_part.is_empty() {
24 output.extend(prev_part);
25 output.extend(delimiter);
26 }
27
28 prev_idx = idx + delimiter.len();
29 }
30
31 if prev_idx < line.len() {
32 output.extend(&line[prev_idx..]);
33 }
34}
35
36#[cfg(feature = "regex")]
37fn compress_delimiter_with_regex<'a>(
38 line: &'a [u8],
39 re: &Regex,
40 new_delimiter: &[u8],
41) -> std::borrow::Cow<'a, [u8]> {
42 re.replace_all(line, new_delimiter)
43}
44
45fn trim<'a>(buffer: &'a [u8], trim_kind: &Trim, delimiter: &[u8]) -> &'a [u8] {
46 match trim_kind {
47 Trim::Both => {
48 let mut idx = 0;
49 let mut r_idx = buffer.len();
50
51 while buffer[idx..].starts_with(delimiter) {
52 idx += delimiter.len();
53 }
54
55 while buffer[idx..r_idx].ends_with(delimiter) {
56 r_idx -= delimiter.len();
57 }
58
59 &buffer[idx..r_idx]
60 }
61 Trim::Left => {
62 let mut idx = 0;
63
64 while buffer[idx..].starts_with(delimiter) {
65 idx += delimiter.len();
66 }
67
68 &buffer[idx..]
69 }
70 Trim::Right => {
71 let mut r_idx = buffer.len();
72
73 while buffer[..r_idx].ends_with(delimiter) {
74 r_idx -= delimiter.len();
75 }
76
77 &buffer[..r_idx]
78 }
79 }
80}
81
82#[cfg(feature = "regex")]
83fn trim_regex<'a>(line: &'a [u8], trim_kind: &Trim, re: &Regex) -> &'a [u8] {
84 let mut iter = re.find_iter(line);
85 let mut idx_start = 0;
86 let mut idx_end = line.len();
87
88 if (trim_kind == &Trim::Both || trim_kind == &Trim::Left)
89 && let Some(m) = iter.next()
90 && m.start() == 0
91 {
92 idx_start = m.end();
93 }
94
95 if (trim_kind == &Trim::Both || trim_kind == &Trim::Right)
96 && let Some(m) = iter.last()
97 && m.end() == line.len()
98 {
99 idx_end = m.start();
100 }
101
102 &line[idx_start..idx_end]
103}
104
105macro_rules! write_maybe_as_json {
106 ($writer:ident, $to_print:ident, $as_json:expr) => {{
107 if $as_json {
108 let x;
109 $writer.write_all(unsafe {
110 x = serde_json::to_string(std::str::from_utf8_unchecked(&$to_print))?;
113 x.as_bytes()
114 })?;
115 } else {
116 $writer.write_all(&$to_print)?;
117 }
118 }};
119}
120
121pub fn cut_str<W: Write, F, R>(
122 line: &[u8],
123 opt: &Opt,
124 stdout: &mut W,
125 compressed_line_buf: &mut Vec<u8>,
126 eol: &[u8],
127 plan: &mut FieldPlan<F, R>,
128) -> Result<()>
129where
130 F: DelimiterFinder,
131 R: DelimiterFinder,
132{
133 let mut line = line;
134
135 if let Some(trim_kind) = opt.trim {
136 if opt.regex_bag.is_some() {
137 #[cfg(feature = "regex")]
138 {
139 line = trim_regex(line, &trim_kind, &opt.regex_bag.as_ref().unwrap().greedy);
140 }
141 } else {
142 line = trim(line, &trim_kind, &opt.delimiter);
143 }
144 }
145
146 if line.is_empty() {
147 if !opt.only_delimited {
148 stdout.write_all(eol)?;
149 }
150 return Ok(());
151 }
152
153 #[cfg(feature = "regex")]
154 let line_holder: std::borrow::Cow<[u8]>;
155
156 if opt.compress_delimiter {
157 if opt.regex_bag.is_some() && cfg!(feature = "regex") {
158 #[cfg(feature = "regex")]
159 {
160 let delimiter = opt.replace_delimiter.as_ref().unwrap(); line_holder = compress_delimiter_with_regex(
162 line,
163 &opt.regex_bag.as_ref().unwrap().greedy,
164 delimiter,
165 );
166 line = &line_holder;
167 }
168 } else {
169 compress_delimiter(line, &opt.delimiter, compressed_line_buf);
170 line = compressed_line_buf;
171 }
172 }
173
174 let maybe_maybe_num_fields = (plan.extract_func)(line, plan);
175 let maybe_num_fields = maybe_maybe_num_fields.unwrap_or(None);
176
177 if opt.only_delimited
178 && maybe_num_fields
179 .expect("We didn't use an extract function that counted the number of fields")
180 == 1
181 {
182 return Ok(());
185 }
186
187 if opt.json {
188 stdout.write_all(b"[")?;
189 }
190
191 let mut _bounds: UserBoundsList;
192 let mut bounds = &opt.bounds;
193
194 if opt.complement {
195 _bounds =
196 bounds
197 .complement(maybe_num_fields.expect(
198 "We didn't use an extract function that counted the number of fields",
199 ))?;
200 bounds = &_bounds;
201
202 if bounds.is_empty() {
203 if !opt.only_delimited {
205 stdout.write_all(eol)?;
206 }
207 return Ok(());
208 }
209 }
210
211 if opt.unpack {
212 if bounds.iter().any(|bof| match bof {
219 BoundOrFiller::Bound(b) => b.l() != b.r(),
220 BoundOrFiller::Filler(_) => false,
221 }) {
222 _bounds = bounds.unpack(
223 maybe_num_fields
224 .expect("We didn't use an extract function that counted the number of fields"),
225 );
226 bounds = &_bounds;
227 }
228 }
229
230 bounds.iter().try_for_each(|bof| -> Result<()> {
231 let b = match bof {
232 BoundOrFiller::Filler(f) => {
233 stdout.write_all(f.as_bytes())?;
234 return Ok(());
235 }
236 BoundOrFiller::Bound(b) => b,
237 };
238
239 let field = plan.get_field(b, line.len());
240 let output = if let Ok(field) = field {
241 &line[field.start..field.end]
242 } else if b.fallback_oob().is_some() {
243 b.fallback_oob().as_ref().unwrap()
244 } else if let Some(generic_fallback) = &opt.fallback_oob {
245 generic_fallback
246 } else {
247 return Err(field.unwrap_err());
248 };
249
250 let mut field_to_print = output;
251 let output_with_delimiter_replaced;
252
253 if let Some(replace_func) = opt.replace_delimiter_fn {
254 output_with_delimiter_replaced = replace_func(output, opt);
255 field_to_print = &output_with_delimiter_replaced;
256 }
257
258 write_maybe_as_json!(stdout, field_to_print, opt.json);
259
260 if opt.join && !b.is_last() {
261 stdout.write_all(
262 opt.replace_delimiter
263 .as_ref()
264 .unwrap_or(&opt.delimiter)
265 .as_bytes(),
266 )?;
267 }
268
269 Ok(())
270 })?;
271
272 if opt.json {
273 stdout.write_all(b"]")?;
274 }
275
276 stdout.write_all(eol)?;
277
278 Ok(())
279}
280
281pub fn read_and_cut_str<B: BufRead, W: Write>(
282 stdin: &mut B,
283 stdout: &mut W,
284 opt: &Opt,
285) -> Result<()> {
286 let line_buf: Vec<u8> = Vec::with_capacity(1024);
287 let mut compressed_line_buf = if opt.compress_delimiter {
288 Vec::with_capacity(line_buf.capacity())
289 } else {
290 Vec::new()
291 };
292
293 let should_compress_delimiter = opt.compress_delimiter
295 && (opt.bounds_type == BoundsType::Fields || opt.bounds_type == BoundsType::Lines);
296
297 #[cfg(feature = "regex")]
298 let maybe_regex = opt.regex_bag.as_ref().map(|x| {
299 if opt.greedy_delimiter {
300 &x.greedy
301 } else {
302 &x.normal
303 }
304 });
305 #[cfg(not(feature = "regex"))]
306 let maybe_regex: Option<()> = None;
307
308 if should_compress_delimiter && maybe_regex.is_some() && opt.replace_delimiter.is_some() {
309 let replace_delimiter = opt.replace_delimiter.as_ref().unwrap();
315 let mut plan = FieldPlan::from_opt_fixed_with_custom_delimiter(opt, replace_delimiter)?;
316
317 process_lines_with_plan(stdin, stdout, opt, &mut compressed_line_buf, &mut plan)
318 } else if maybe_regex.is_some() {
319 #[cfg(feature = "regex")]
320 {
321 let regex = maybe_regex.unwrap();
322 let trim_empty = opt.bounds_type == BoundsType::Characters;
323 let mut plan = FieldPlan::from_opt_regex(opt, regex.clone(), trim_empty)?;
324 process_lines_with_plan(stdin, stdout, opt, &mut compressed_line_buf, &mut plan)
325 }
326 #[cfg(not(feature = "regex"))]
327 {
328 unreachable!()
329 }
330 } else if opt.greedy_delimiter {
331 let mut plan = FieldPlan::from_opt_fixed_greedy(opt)?;
332 process_lines_with_plan(stdin, stdout, opt, &mut compressed_line_buf, &mut plan)
333 } else {
334 let mut plan = FieldPlan::from_opt_fixed(opt)?;
336 process_lines_with_plan(stdin, stdout, opt, &mut compressed_line_buf, &mut plan)
337 }
338}
339
340fn process_lines_with_plan<B, W, F, R>(
342 stdin: &mut B,
343 stdout: &mut W,
344 opt: &Opt,
345 compressed_line_buf: &mut Vec<u8>,
346 plan: &mut FieldPlan<F, R>,
347) -> Result<()>
348where
349 B: BufRead,
350 W: Write,
351 F: DelimiterFinder,
352 R: DelimiterFinder,
353{
354 match (opt.read_to_end, opt.eol) {
355 (false, EOL::Newline) => stdin.for_byte_line(|line| {
356 cut_str(
357 line,
358 opt,
359 stdout,
360 compressed_line_buf,
361 &[opt.eol.into()],
362 plan,
363 )
364 .map_err(|x| {
365 x.downcast::<std::io::Error>()
366 .unwrap_or_else(|e| std::io::Error::other(e.to_string()))
367 })
368 .and(Ok(true))
369 })?,
370 (false, EOL::Zero) => stdin.for_byte_record(opt.eol.into(), |line| {
371 cut_str(
372 line,
373 opt,
374 stdout,
375 compressed_line_buf,
376 &[opt.eol.into()],
377 plan,
378 )
379 .map_err(|x| {
380 x.downcast::<std::io::Error>()
381 .unwrap_or_else(|e| std::io::Error::other(e.to_string()))
382 })
383 .and(Ok(true))
384 })?,
385 (true, _) => {
386 let mut line: Vec<u8> = Vec::new();
387 stdin.read_to_end(&mut line)?;
388 let line = line.strip_suffix(opt.delimiter.as_slice()).unwrap_or(&line);
389 cut_str(line, opt, stdout, compressed_line_buf, &opt.delimiter, plan)?
390 }
391 }
392 Ok(())
393}
394
395#[cfg(test)]
396mod tests {
397 use crate::{bounds::UserBoundsList, options::EOL};
398
399 #[cfg(feature = "regex")]
400 use crate::options::{RegexBag, Trim};
401
402 use std::{io::Cursor, str::FromStr};
403
404 use super::*;
405
406 fn make_fields_opt() -> Opt {
407 Opt {
408 bounds_type: BoundsType::Fields,
409 delimiter: "-".into(),
410 ..Opt::default()
411 }
412 }
413
414 #[cfg(feature = "regex")]
415 fn make_regex_bag() -> RegexBag {
416 RegexBag {
417 normal: Regex::from_str("[.,]").unwrap(),
418 greedy: Regex::from_str("([.,])+").unwrap(),
419 }
420 }
421
422 #[cfg(feature = "regex")]
423 fn make_cut_characters_regex_bag() -> RegexBag {
424 RegexBag {
425 normal: Regex::from_str("\\b|\\B").unwrap(),
426 greedy: Regex::from_str("(\\b|\\B)+").unwrap(),
427 }
428 }
429
430 #[test]
431 fn test_read_and_cut_str_echo_non_delimited_strings() {
432 let opt = make_fields_opt();
436 let mut input = b"foo".as_slice();
437 let mut output = Vec::new();
438 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
439 assert_eq!(output, b"foo\n".as_slice());
440 }
441
442 #[test]
443 fn test_read_and_cut_str_echo_non_delimited_strings_with_eol_zero() {
444 let mut opt = make_fields_opt();
448 opt.eol = EOL::Zero;
449 let mut input = b"foo".as_slice();
450 let mut output = Vec::new();
451 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
452 assert_eq!(output, b"foo\0".as_slice());
453 }
454
455 fn make_cut_str_buffers() -> (Vec<u8>, Vec<u8>) {
456 let output = Vec::new();
457 let compressed_line_buffer = Vec::new();
458 (output, compressed_line_buffer)
459 }
460
461 #[test]
462 fn read_and_cut_str_echo_non_delimited_strings() {
463 let opt = make_fields_opt();
464
465 let line = b"foo";
466
467 let (mut output, _) = make_cut_str_buffers();
469 let mut input = Cursor::new(line);
470 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
471 assert_eq!(output, b"foo\n".as_slice());
472
473 let line = b"";
475 let (mut output, _) = make_cut_str_buffers();
476 let mut input = Cursor::new(line);
477 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
478 assert_eq!(output, b"".as_slice());
479 }
480
481 #[test]
482 fn read_and_cut_str_skip_non_delimited_strings_when_requested() {
483 let mut opt = make_fields_opt();
484
485 opt.only_delimited = true;
486
487 let line = b"foo";
489 let (mut output, _) = make_cut_str_buffers();
490 let mut input = Cursor::new(line);
491 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
492 assert_eq!(output, b"".as_slice());
493
494 let line = b"";
496 let (mut output, _) = make_cut_str_buffers();
497 let mut input = Cursor::new(line);
498 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
499 assert_eq!(output, b"".as_slice());
500 }
501
502 #[test]
503 fn read_and_cut_str_it_cut_a_field() {
504 let mut opt = make_fields_opt();
505 let (mut output, _) = make_cut_str_buffers();
506
507 let line = b"a-b-c";
508 opt.bounds = UserBoundsList::from_str("1").unwrap();
509
510 let mut input = Cursor::new(line);
511 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
512 assert_eq!(output, b"a\n".as_slice());
513 }
514
515 #[test]
516 fn read_and_cut_str_it_cut_ranges() {
517 let mut opt = make_fields_opt();
518 let (mut output, _) = make_cut_str_buffers();
519
520 let line = b"a-b-c";
521 opt.bounds = UserBoundsList::from_str("1,1:3").unwrap();
522
523 let mut input = Cursor::new(line);
524 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
525 assert_eq!(output, b"aa-b-c\n".as_slice());
526 }
527
528 #[cfg(feature = "regex")]
529 #[test]
530 fn read_and_cut_str_regex_it_cut_a_field() {
531 let mut opt = make_fields_opt();
532 let (mut output, _) = make_cut_str_buffers();
533
534 let line = b"a.b,c";
535 opt.bounds = UserBoundsList::from_str("1,2,3").unwrap();
536 opt.regex_bag = Some(make_regex_bag());
537
538 let mut input = Cursor::new(line);
539 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
540 assert_eq!(output, b"abc\n".as_slice());
541 }
542
543 #[cfg(feature = "regex")]
544 #[test]
545 fn test_trim_regex_left_match() {
546 let line: &[u8] = b"---a-b---";
547 let trim_kind = Trim::Left;
548 let regex = Regex::new("-+").unwrap();
549 let result = trim_regex(line, &trim_kind, ®ex);
550
551 assert_eq!(result, b"a-b---");
552 }
553
554 #[cfg(feature = "regex")]
555 #[test]
556 fn test_trim_regex_left_no_match_risk_wrong_match() {
557 let line: &[u8] = b"a-b---";
558 let trim_kind = Trim::Left;
559 let regex = Regex::new("-+").unwrap();
560 let result = trim_regex(line, &trim_kind, ®ex);
561
562 assert_eq!(result, b"a-b---");
563 }
564
565 #[cfg(feature = "regex")]
566 #[test]
567 fn test_trim_regex_left_no_match() {
568 let line: &[u8] = b"abc";
569 let trim_kind = Trim::Left;
570 let regex = Regex::new("-+").unwrap();
571 let result = trim_regex(line, &trim_kind, ®ex);
572
573 assert_eq!(result, b"abc");
574 }
575
576 #[cfg(feature = "regex")]
577 #[test]
578 fn test_trim_regex_right() {
579 let line: &[u8] = b"---a-b---";
580 let trim_kind = Trim::Right;
581 let regex = Regex::new("-+").unwrap();
582 let result = trim_regex(line, &trim_kind, ®ex);
583
584 assert_eq!(result, b"---a-b");
585 }
586
587 #[cfg(feature = "regex")]
588 #[test]
589 fn test_trim_regex_right_no_match() {
590 let line: &[u8] = b"---a-b";
591 let trim_kind = Trim::Right;
592 let regex = Regex::new("-+").unwrap();
593 let result = trim_regex(line, &trim_kind, ®ex);
594
595 assert_eq!(result, b"---a-b");
596 }
597
598 #[cfg(feature = "regex")]
599 #[test]
600 fn test_trim_regex_both() {
601 let line: &[u8] = b"---a-b---";
602 let trim_kind = Trim::Both;
603 let regex = Regex::new("-+").unwrap();
604 let result = trim_regex(line, &trim_kind, ®ex);
605
606 assert_eq!(result, b"a-b");
607 }
608
609 #[cfg(feature = "regex")]
610 #[test]
611 fn test_trim_regex_both_no_match() {
612 let line: &[u8] = b"a-b";
613 let trim_kind = Trim::Both;
614 let regex = Regex::new("-+").unwrap();
615 let result = trim_regex(line, &trim_kind, ®ex);
616
617 assert_eq!(result, b"a-b");
618 }
619
620 #[test]
621 fn cut_str_it_cut_consecutive_delimiters() {
622 let mut opt = make_fields_opt();
623 let (mut output, _) = make_cut_str_buffers();
624
625 let line = b"a-b-c";
626 opt.bounds = UserBoundsList::from_str("1,3").unwrap();
627
628 let mut input = Cursor::new(line);
629 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
630 assert_eq!(output, b"ac\n".as_slice());
631 }
632
633 #[test]
634 fn cut_str_it_compress_delimiters() {
635 let mut opt = make_fields_opt();
636 opt.bounds = UserBoundsList::from_str("2").unwrap();
637
638 let line = b"--a---b--";
639
640 let (mut output, _) = make_cut_str_buffers();
642 opt.compress_delimiter = false;
643 let mut input = Cursor::new(line);
644 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
645 assert_eq!(output, b"\n".as_slice());
646
647 let (mut output, _) = make_cut_str_buffers();
649 opt.compress_delimiter = true;
650 let mut input = Cursor::new(line);
651 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
652 assert_eq!(output, b"a\n".as_slice());
653
654 let (mut output, _) = make_cut_str_buffers();
656 opt.bounds = UserBoundsList::from_str("1:").unwrap();
657 opt.compress_delimiter = true;
658 let mut input = Cursor::new(line);
659 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
660 assert_eq!(output, b"-a-b-\n".as_slice());
661
662 let line = b"a---b";
664 let (mut output, _) = make_cut_str_buffers();
665 opt.bounds = UserBoundsList::from_str("1:").unwrap();
666 opt.compress_delimiter = true;
667 let mut input = Cursor::new(line);
668 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
669 assert_eq!(output, b"a-b\n".as_slice());
670 }
671
672 #[cfg(feature = "regex")]
673 #[test]
674 fn cut_str_regex_it_compress_delimiters() {
675 let mut opt = make_fields_opt();
676
677 let line = b".,a,,,b..c";
678 let (mut output, _) = make_cut_str_buffers();
679 opt.bounds = UserBoundsList::from_str("2,3,4").unwrap();
680 opt.compress_delimiter = true;
681 opt.regex_bag = Some(make_regex_bag());
682 opt.replace_delimiter = Some("-".into());
683
684 let mut input = Cursor::new(line);
685 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
686 assert_eq!(output, b"abc\n".as_slice());
687
688 let line = b".,a,,,b..c";
689 let (mut output, _) = make_cut_str_buffers();
690 opt.bounds = UserBoundsList::from_str("1:").unwrap();
691 opt.compress_delimiter = true;
692 opt.regex_bag = Some(make_regex_bag());
693 opt.replace_delimiter = Some("-".into());
694
695 let mut input = Cursor::new(line);
696 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
697 assert_eq!(output, b"-a-b-c\n".as_slice());
698 }
699
700 #[cfg(feature = "regex")]
701 #[test]
702 fn cut_str_it_cut_characters() {
703 let mut opt = make_fields_opt();
704 let (mut output, _) = make_cut_str_buffers();
705
706 let line = "😁🤩😝😎".as_bytes();
707 opt.bounds = UserBoundsList::from_str("2").unwrap();
708 opt.bounds_type = BoundsType::Characters;
709 opt.regex_bag = Some(make_cut_characters_regex_bag());
710
711 let mut input = Cursor::new(line);
712 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
713 assert_eq!(output, "🤩\n".as_bytes());
714 }
715
716 #[cfg(feature = "regex")]
717 #[test]
718 fn cut_str_it_cut_characters_and_replace_the_delimiter() {
719 let opt: Opt = "-c 1,2,3:4 -r - ".parse().unwrap();
720 let (mut output, _) = make_cut_str_buffers();
721
722 let line = "😁🤩😝😎".as_bytes();
723
724 let mut input = Cursor::new(line);
725 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
726 assert_eq!(&String::from_utf8_lossy(&output), "😁-🤩-😝-😎\n");
727 }
728
729 #[test]
730 fn cut_str_it_supports_zero_terminated_lines() {
731 let mut opt = make_fields_opt();
732 let (mut output, _) = make_cut_str_buffers();
733
734 let line = b"a-b-c";
735 opt.bounds = UserBoundsList::from_str("2").unwrap();
736 opt.eol = EOL::Zero;
737
738 let mut input = Cursor::new(line);
739 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
740 assert_eq!(output, b"b\0".as_slice());
741 }
742
743 #[test]
744 fn cut_str_it_complement_ranges() {
745 let mut opt = make_fields_opt();
746 let (mut output, _) = make_cut_str_buffers();
747
748 let line = b"a-b-c";
749 opt.bounds = UserBoundsList::from_str("2").unwrap();
750 opt.complement = true;
751
752 let mut input = Cursor::new(line);
753 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
754 assert_eq!(output, b"ac\n".as_slice());
755 }
756
757 #[test]
758 fn cut_str_it_join_fields() {
759 let mut opt = make_fields_opt();
760 let (mut output, _) = make_cut_str_buffers();
761
762 let line = b"a-b-c";
763 opt.bounds = UserBoundsList::from_str("1,3").unwrap();
764 opt.join = true;
765
766 let mut input = Cursor::new(line);
767 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
768 assert_eq!(output, b"a-c\n".as_slice());
769 }
770
771 #[test]
772 fn cut_str_it_join_fields_with_a_custom_delimiter() {
773 let mut opt = make_fields_opt();
774 let (mut output, _) = make_cut_str_buffers();
775
776 let line = b"a-b-c";
777 opt.bounds = UserBoundsList::from_str("1,3").unwrap();
778 opt.join = true;
779 opt.replace_delimiter = Some("*".into());
780
781 let mut input = Cursor::new(line);
782 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
783 assert_eq!(output, b"a*c\n".as_slice());
784 }
785
786 #[test]
787 fn cut_str_it_replace_delimiter() {
788 let opt: Opt = "-d - -f 1:3 -r _".parse().unwrap();
789 let (mut output, _) = make_cut_str_buffers();
790
791 let line = b"a-b-c";
792
793 let mut input = Cursor::new(line);
794 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
795 assert_eq!(output, b"a_b_c\n".as_slice());
796 }
797
798 #[cfg(feature = "regex")]
799 #[test]
800 fn cut_str_regex_it_replace_delimiter() {
801 let opt: Opt = "-e [,] -f 1:3 -r _".parse().unwrap();
802 let (mut output, _) = make_cut_str_buffers();
803
804 let line = b"a,b,c";
805
806 let mut input = Cursor::new(line);
807 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
808 assert_eq!(output, b"a_b_c\n".as_slice());
809 }
810
811 #[test]
812 fn cut_str_it_compress_and_replace_delimiter() {
813 let opt: Opt = "-d - -f 1:3 -r _ -p".parse().unwrap();
814 let (mut output, _) = make_cut_str_buffers();
815
816 let line = b"a--b--c";
817
818 let mut input = Cursor::new(line);
819 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
820 assert_eq!(output, b"a_b_c\n".as_slice());
821 }
822
823 #[cfg(feature = "regex")]
824 #[test]
825 fn cut_str_regex_it_compress_and_replace_delimiter() {
826 let opt: Opt = "-e [,] -f 1:3 -r _ -p".parse().unwrap();
827 let (mut output, _) = make_cut_str_buffers();
828
829 let line = b"a,,b,,c";
830
831 let mut input = Cursor::new(line);
832 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
833 assert_eq!(output, b"a_b_c\n".as_slice());
834 }
835
836 #[cfg(feature = "regex")]
837 #[test]
838 fn cut_str_regex_it_join_fields_with_a_custom_delimiter() {
839 let mut opt = make_fields_opt();
840 let (mut output, _) = make_cut_str_buffers();
841
842 let line = b"a.b,c";
843 opt.bounds = UserBoundsList::from_str("1,3").unwrap();
844 opt.delimiter = "[.,]".into();
845 opt.regex_bag = Some(make_regex_bag());
846 opt.join = true;
847 opt.replace_delimiter = Some("<->".into());
848
849 let mut input = Cursor::new(line);
850 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
851 assert_eq!(output, b"a<->c\n".as_slice());
852 }
853
854 #[test]
855 fn cut_str_it_format_fields() {
856 let mut opt = make_fields_opt();
857 let (mut output, _) = make_cut_str_buffers();
858
859 let line = b"a-b-c";
860 opt.bounds = UserBoundsList::from_str("{1} < {3} > {2}").unwrap();
861
862 let mut input = Cursor::new(line);
863 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
864 assert_eq!(output, b"a < c > b\n".as_slice());
865 }
866
867 #[test]
868 fn cut_str_supports_greedy_delimiter() {
869 let mut opt = make_fields_opt();
870 let (mut output, _) = make_cut_str_buffers();
871
872 let line = b"a---b---c";
873 opt.bounds = UserBoundsList::from_str("2").unwrap();
874 opt.greedy_delimiter = true;
875
876 let mut input = Cursor::new(line);
877 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
878 assert_eq!(output, b"b\n".as_slice());
879
880 let mut opt = make_fields_opt();
882 let (mut output, _) = make_cut_str_buffers();
883
884 let line = b"a---b---c";
885 opt.bounds = UserBoundsList::from_str("2:3").unwrap();
886 opt.greedy_delimiter = true;
887
888 let mut input = Cursor::new(line);
889 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
890 assert_eq!(output, b"b---c\n".as_slice());
891 }
892
893 #[cfg(feature = "regex")]
894 #[test]
895 fn cut_str_regex_supports_greedy_delimiter() {
896 let mut opt = make_fields_opt();
898 let (mut output, _) = make_cut_str_buffers();
899
900 let line = b"a,,.,b..,,c";
901 opt.bounds = UserBoundsList::from_str("2:3").unwrap();
902
903 opt.greedy_delimiter = true;
904 opt.delimiter = "[.,]".into();
905 opt.regex_bag = Some(make_regex_bag());
906
907 let mut input = Cursor::new(line);
908 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
909 assert_eq!(output, b"b..,,c\n".as_slice());
910 }
911
912 #[test]
913 fn cut_str_it_trim_fields() {
914 let mut opt = make_fields_opt();
915 let line = b"--a--b--c--";
916
917 opt.trim = Some(Trim::Both);
919 opt.bounds = UserBoundsList::from_str("1,3,-1").unwrap();
920
921 let (mut output, _) = make_cut_str_buffers();
922 let mut input = Cursor::new(line);
923 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
924 assert_eq!(output, b"abc\n".as_slice());
925
926 opt.trim = Some(Trim::Left);
928 opt.bounds = UserBoundsList::from_str("1,3,-3").unwrap();
929
930 let (mut output, _) = make_cut_str_buffers();
931 let mut input = Cursor::new(line);
932 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
933 assert_eq!(output, b"abc\n".as_slice());
934
935 opt.trim = Some(Trim::Right);
937 opt.bounds = UserBoundsList::from_str("3,5,-1").unwrap();
938
939 let (mut output, _) = make_cut_str_buffers();
940 let mut input = Cursor::new(line);
941 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
942 assert_eq!(output, b"abc\n".as_slice());
943 }
944
945 #[cfg(feature = "regex")]
946 #[test]
947 fn cut_str_regex_it_trim_fields() {
948 let mut opt = make_fields_opt();
949 let line = b"..a,.b..c,,";
950
951 opt.delimiter = "[.,]".into();
952 opt.regex_bag = Some(make_regex_bag());
953
954 opt.trim = Some(Trim::Both);
956 opt.bounds = UserBoundsList::from_str("1,3,-1").unwrap();
957
958 let (mut output, _) = make_cut_str_buffers();
959 let mut input = Cursor::new(line);
960 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
961 assert_eq!(output, b"abc\n".as_slice());
962
963 opt.trim = Some(Trim::Left);
965 opt.bounds = UserBoundsList::from_str("1,3,-3").unwrap();
966
967 let (mut output, _) = make_cut_str_buffers();
968 let mut input = Cursor::new(line);
969 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
970 assert_eq!(output, b"abc\n".as_slice());
971
972 opt.trim = Some(Trim::Right);
974 opt.bounds = UserBoundsList::from_str("3,5,-1").unwrap();
975
976 let (mut output, _) = make_cut_str_buffers();
977 let mut input = Cursor::new(line);
978 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
979 assert_eq!(output, b"abc\n".as_slice());
980 }
981
982 #[test]
983 fn cut_str_it_produce_json_output() {
984 let mut opt = make_fields_opt();
985 opt.json = true;
986 opt.replace_delimiter = Some(",".into());
987 let (mut output, _) = make_cut_str_buffers();
988
989 let line = b"a-b-c";
990 opt.bounds = UserBoundsList::from_str("1,3").unwrap();
991 opt.join = true;
992
993 let mut input = Cursor::new(line);
994 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
995 assert_eq!(
996 output,
997 br#"["a","c"]
998"#
999 .as_slice()
1000 );
1001 }
1002
1003 #[test]
1004 fn cut_str_json_with_single_field_is_still_an_array() {
1005 let mut opt = make_fields_opt();
1006 opt.json = true;
1007 opt.replace_delimiter = Some(",".into());
1008 let (mut output, _) = make_cut_str_buffers();
1009
1010 let line = b"a-b-c";
1011 opt.bounds = UserBoundsList::from_str("1").unwrap();
1012 opt.join = true;
1013
1014 let mut input = Cursor::new(line);
1015 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
1016 assert_eq!(
1017 output,
1018 br#"["a"]
1019"#
1020 .as_slice()
1021 );
1022 }
1023
1024 #[test]
1025 fn cut_str_complement_works_with_json() {
1026 let opt: Opt = "-d - -f 2,2:3,-1 -j --json --complement".parse().unwrap();
1027 let (mut output, _) = make_cut_str_buffers();
1028
1029 let line = b"a-b-c";
1030
1031 let mut input = Cursor::new(line);
1032 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
1033 assert_eq!(
1034 output,
1035 br#"["a","c","a","a","b"]
1036"#
1037 .as_slice()
1038 );
1039 }
1040
1041 #[cfg(feature = "regex")]
1042 #[test]
1043 fn cut_str_json_on_characters_works() {
1044 let opt: Opt = "-c 1,2,3:4 --json".parse().unwrap();
1045 let (mut output, _) = make_cut_str_buffers();
1046
1047 let line = "😁🤩😝😎".as_bytes();
1048
1049 let mut input = Cursor::new(line);
1050 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
1051 assert_eq!(
1052 &String::from_utf8_lossy(&output),
1053 r#"["😁","🤩","😝","😎"]
1054"#
1055 );
1056 }
1057
1058 #[test]
1059 fn test_cut_bytes_stream_cut_simplest_field_with_eol_and_fallbacks() {
1060 let mut opt = make_fields_opt();
1061 let (mut output, _) = make_cut_str_buffers();
1062
1063 let line = b"a";
1064 opt.fallback_oob = Some(b"generic fallback".to_vec());
1065 opt.bounds = UserBoundsList::from_str("{1}-fill-{2}-more fill-{3=last fill}").unwrap();
1066
1067 let mut input = Cursor::new(line);
1068 read_and_cut_str(&mut input, &mut output, &opt).unwrap();
1069
1070 assert_eq!(
1071 &String::from_utf8_lossy(&output),
1072 "a-fill-generic fallback-more fill-last fill\n"
1073 );
1074 }
1075}