1use clap::builder::ValueParser;
9use clap::{Arg, ArgAction, Command};
10use memchr::{Memchr3, memchr_iter, memmem::Finder};
11use std::cmp::Ordering;
12use std::ffi::OsString;
13use std::fs::File;
14use std::io::{BufRead, BufReader, BufWriter, Split, Stdin, Write, stdin, stdout};
15use std::num::IntErrorKind;
16#[cfg(unix)]
17use std::os::unix::ffi::OsStrExt;
18use thiserror::Error;
19use uucore::display::Quotable;
20use uucore::error::{FromIo, UError, UResult, USimpleError, set_exit_code};
21use uucore::format_usage;
22use uucore::i18n::collator::{
23 AlternateHandling, CollatorOptions, locale_cmp, should_use_locale_collation, try_init_collator,
24};
25use uucore::line_ending::LineEnding;
26use uucore::translate;
27
28#[derive(Debug, Error)]
29enum JoinError {
30 #[error("{}", translate!("join-error-io", "error" => .0))]
31 IOError(#[from] std::io::Error),
32
33 #[error("{0}")]
34 UnorderedInput(String),
35}
36
37impl UError for JoinError {
39 fn code(&self) -> i32 {
40 1
41 }
42}
43
44#[derive(Copy, Clone, PartialEq)]
45enum FileNum {
46 File1,
47 File2,
48}
49
50#[derive(Clone)]
51enum SepSetting {
52 Byte(u8),
54 Char(Vec<u8>),
56 Line,
58 Whitespaces,
60}
61
62trait Separator: Clone {
63 fn field_ranges(&self, haystack: &[u8], len_guess: usize) -> Vec<(usize, usize)>;
65 fn output_separator(&self) -> &[u8];
67}
68
69#[derive(Copy, Clone)]
71struct OneByteSep {
72 byte: [u8; 1],
73}
74
75impl Separator for OneByteSep {
76 fn field_ranges(&self, haystack: &[u8], len_guess: usize) -> Vec<(usize, usize)> {
77 let mut field_ranges = Vec::with_capacity(len_guess);
78 let mut last_end = 0;
79
80 for i in memchr_iter(self.byte[0], haystack) {
81 field_ranges.push((last_end, i));
82 last_end = i + 1;
83 }
84 field_ranges.push((last_end, haystack.len()));
85 field_ranges
86 }
87
88 fn output_separator(&self) -> &[u8] {
89 &self.byte
90 }
91}
92
93#[derive(Clone)]
95struct MultiByteSep<'a> {
96 finder: Finder<'a>,
97}
98
99impl Separator for MultiByteSep<'_> {
100 fn field_ranges(&self, haystack: &[u8], len_guess: usize) -> Vec<(usize, usize)> {
101 let mut field_ranges = Vec::with_capacity(len_guess);
102 let mut last_end = 0;
103
104 for i in self.finder.find_iter(haystack) {
105 field_ranges.push((last_end, i));
106 last_end = i + self.finder.needle().len();
107 }
108 field_ranges.push((last_end, haystack.len()));
109 field_ranges
110 }
111
112 fn output_separator(&self) -> &[u8] {
113 self.finder.needle()
114 }
115}
116
117#[derive(Copy, Clone)]
119struct LineSep {}
120
121impl Separator for LineSep {
122 fn field_ranges(&self, haystack: &[u8], _len_guess: usize) -> Vec<(usize, usize)> {
123 vec![(0, haystack.len())]
124 }
125
126 fn output_separator(&self) -> &[u8] {
127 &[]
128 }
129}
130
131#[derive(Copy, Clone)]
133struct WhitespaceSep {}
134
135impl Separator for WhitespaceSep {
136 fn field_ranges(&self, haystack: &[u8], len_guess: usize) -> Vec<(usize, usize)> {
137 let mut field_ranges = Vec::with_capacity(len_guess);
138 let mut last_end = 0;
139
140 for i in Memchr3::new(b' ', b'\t', b'\n', haystack) {
143 if i > last_end {
145 field_ranges.push((last_end, i));
146 }
147 last_end = i + 1;
148 }
149 field_ranges.push((last_end, haystack.len()));
150 field_ranges
151 }
152
153 fn output_separator(&self) -> &[u8] {
154 b" "
155 }
156}
157
158#[derive(Copy, Clone, PartialEq)]
159enum CheckOrder {
160 Default,
161 Disabled,
162 Enabled,
163}
164
165struct Settings {
166 key1: usize,
167 key2: usize,
168 print_unpaired1: bool,
169 print_unpaired2: bool,
170 print_joined: bool,
171 ignore_case: bool,
172 line_ending: LineEnding,
173 separator: SepSetting,
174 autoformat: bool,
175 format: Vec<Spec>,
176 empty: Vec<u8>,
177 check_order: CheckOrder,
178 headers: bool,
179}
180
181impl Default for Settings {
182 fn default() -> Self {
183 Self {
184 key1: 0,
185 key2: 0,
186 print_unpaired1: false,
187 print_unpaired2: false,
188 print_joined: true,
189 ignore_case: false,
190 line_ending: LineEnding::Newline,
191 separator: SepSetting::Whitespaces,
192 autoformat: false,
193 format: vec![],
194 empty: vec![],
195 check_order: CheckOrder::Default,
196 headers: false,
197 }
198 }
199}
200
201struct Repr<'a, Sep: Separator> {
203 line_ending: LineEnding,
204 separator: Sep,
205 format: Vec<Spec>,
206 empty: &'a [u8],
207}
208
209impl<'a, Sep: Separator> Repr<'a, Sep> {
210 fn new(line_ending: LineEnding, separator: Sep, format: Vec<Spec>, empty: &'a [u8]) -> Self {
211 Repr {
212 line_ending,
213 separator,
214 format,
215 empty,
216 }
217 }
218
219 fn uses_format(&self) -> bool {
220 !self.format.is_empty()
221 }
222
223 fn write_field(
225 &self,
226 writer: &mut impl Write,
227 field: Option<&[u8]>,
228 ) -> Result<(), std::io::Error> {
229 let value = match field {
230 Some(field) => field,
231 None => self.empty,
232 };
233
234 writer.write_all(value)
235 }
236
237 fn write_fields(
239 &self,
240 writer: &mut impl Write,
241 line: &Line,
242 index: usize,
243 ) -> Result<(), std::io::Error> {
244 for i in 0..line.field_ranges.len() {
245 if i != index {
246 writer.write_all(self.separator.output_separator())?;
247 writer.write_all(line.get_field(i).unwrap())?;
248 }
249 }
250 Ok(())
251 }
252
253 fn write_format<F>(&self, writer: &mut impl Write, f: F) -> Result<(), std::io::Error>
255 where
256 F: Fn(&Spec) -> Option<&'a [u8]>,
257 {
258 for i in 0..self.format.len() {
259 if i > 0 {
260 writer.write_all(self.separator.output_separator())?;
261 }
262
263 let field = match f(&self.format[i]) {
264 Some(value) => value,
265 None => self.empty,
266 };
267
268 writer.write_all(field)?;
269 }
270 Ok(())
271 }
272
273 fn write_line_ending(&self, writer: &mut impl Write) -> Result<(), std::io::Error> {
274 writer.write_all(&[self.line_ending as u8])
275 }
276}
277
278#[derive(Eq)]
280struct CaseInsensitiveSlice<'a> {
281 v: &'a [u8],
282}
283
284impl Ord for CaseInsensitiveSlice<'_> {
285 fn cmp(&self, other: &Self) -> Ordering {
286 if let Some((s, o)) =
287 std::iter::zip(self.v.iter(), other.v.iter()).find(|(s, o)| !s.eq_ignore_ascii_case(o))
288 {
289 let s = s.to_ascii_lowercase();
291 let o = o.to_ascii_lowercase();
292 s.cmp(&o)
293 } else {
294 self.v.len().cmp(&other.v.len())
296 }
297 }
298}
299
300impl PartialOrd for CaseInsensitiveSlice<'_> {
301 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
302 Some(self.cmp(other))
303 }
304}
305
306impl PartialEq for CaseInsensitiveSlice<'_> {
307 fn eq(&self, other: &Self) -> bool {
308 self.v.eq_ignore_ascii_case(other.v)
309 }
310}
311
312struct Input<Sep: Separator> {
314 separator: Sep,
315 ignore_case: bool,
316 check_order: CheckOrder,
317 use_locale: bool,
318}
319
320impl<Sep: Separator> Input<Sep> {
321 fn new(separator: Sep, ignore_case: bool, check_order: CheckOrder, use_locale: bool) -> Self {
322 Self {
323 separator,
324 ignore_case,
325 check_order,
326 use_locale,
327 }
328 }
329
330 fn compare(&self, field1: Option<&[u8]>, field2: Option<&[u8]>) -> Ordering {
331 if let (Some(field1), Some(field2)) = (field1, field2) {
332 if self.ignore_case {
333 let field1 = CaseInsensitiveSlice { v: field1 };
334 let field2 = CaseInsensitiveSlice { v: field2 };
335 field1.cmp(&field2)
336 } else if self.use_locale {
337 locale_cmp(field1, field2)
338 } else {
339 field1.cmp(field2)
340 }
341 } else {
342 match field1 {
343 Some(_) => Ordering::Greater,
344 None => match field2 {
345 Some(_) => Ordering::Less,
346 None => Ordering::Equal,
347 },
348 }
349 }
350 }
351}
352
353enum Spec {
354 Key,
355 Field(FileNum, usize),
356}
357
358impl Spec {
359 fn parse(format: &str) -> UResult<Self> {
360 let mut chars = format.chars();
361
362 let file_num = match chars.next() {
363 Some('0') => {
364 if chars.next().is_none() {
366 return Ok(Self::Key);
367 }
368 return Err(USimpleError::new(
369 1,
370 translate!("join-error-invalid-field-specifier", "spec" => format.quote()),
371 ));
372 }
373 Some('1') => FileNum::File1,
374 Some('2') => FileNum::File2,
375 _ => {
376 return Err(USimpleError::new(
377 1,
378 translate!("join-error-invalid-file-number", "spec" => format.quote()),
379 ));
380 }
381 };
382
383 if let Some('.') = chars.next() {
384 return Ok(Self::Field(file_num, parse_field_number(chars.as_str())?));
385 }
386
387 Err(USimpleError::new(
388 1,
389 translate!("join-error-invalid-field-specifier", "spec" => format.quote()),
390 ))
391 }
392}
393
394struct Line {
395 field_ranges: Vec<(usize, usize)>,
396 string: Vec<u8>,
397}
398
399impl Line {
400 fn new<Sep: Separator>(string: Vec<u8>, separator: &Sep, len_guess: usize) -> Self {
401 let field_ranges = separator.field_ranges(&string, len_guess);
402
403 Self {
404 field_ranges,
405 string,
406 }
407 }
408
409 fn get_field(&self, index: usize) -> Option<&[u8]> {
411 if index < self.field_ranges.len() {
412 let (low, high) = self.field_ranges[index];
413 Some(&self.string[low..high])
414 } else {
415 None
416 }
417 }
418}
419
420struct State<'a> {
421 key: usize,
422 file_name: &'a OsString,
423 file_num: FileNum,
424 print_unpaired: bool,
425 lines: Split<Box<dyn BufRead + 'a>>,
426 max_len: usize,
427 seq: Vec<Line>,
428 line_num: usize,
429 has_failed: bool,
430 has_unpaired: bool,
431}
432
433impl<'a> State<'a> {
434 fn new(
435 file_num: FileNum,
436 name: &'a OsString,
437 stdin: &'a Stdin,
438 key: usize,
439 line_ending: LineEnding,
440 print_unpaired: bool,
441 ) -> UResult<Self> {
442 let file_buf = if name == "-" {
443 Box::new(stdin.lock()) as Box<dyn BufRead>
444 } else {
445 let file = File::open(name).map_err_context(|| format!("{}", name.maybe_quote()))?;
446 Box::new(BufReader::new(file)) as Box<dyn BufRead>
447 };
448
449 Ok(State {
450 key,
451 file_name: name,
452 file_num,
453 print_unpaired,
454 lines: file_buf.split(line_ending as u8),
455 max_len: 1,
456 seq: Vec::new(),
457 line_num: 0,
458 has_failed: false,
459 has_unpaired: false,
460 })
461 }
462
463 fn skip_line<Sep: Separator>(
465 &mut self,
466 writer: &mut impl Write,
467 input: &Input<Sep>,
468 repr: &Repr<'a, Sep>,
469 ) -> UResult<()> {
470 if self.print_unpaired {
471 self.write_first_line(writer, repr)?;
472 }
473
474 self.reset_next_line(input)?;
475 Ok(())
476 }
477
478 fn extend<Sep: Separator>(&mut self, input: &Input<Sep>) -> UResult<Option<Line>> {
481 while let Some(line) = self.next_line(input)? {
482 let diff = input.compare(self.get_current_key(), line.get_field(self.key));
483
484 if diff == Ordering::Equal {
485 self.seq.push(line);
486 } else {
487 return Ok(Some(line));
488 }
489 }
490
491 Ok(None)
492 }
493
494 fn write_headers<Sep: Separator>(
496 &self,
497 writer: &mut impl Write,
498 other: &State,
499 repr: &Repr<'a, Sep>,
500 ) -> Result<(), std::io::Error> {
501 if self.has_line() {
502 if other.has_line() {
503 self.combine(writer, other, repr)?;
504 } else {
505 self.write_first_line(writer, repr)?;
506 }
507 } else if other.has_line() {
508 other.write_first_line(writer, repr)?;
509 }
510
511 Ok(())
512 }
513
514 fn combine<Sep: Separator>(
516 &self,
517 writer: &mut impl Write,
518 other: &State,
519 repr: &Repr<'a, Sep>,
520 ) -> Result<(), std::io::Error> {
521 let key = self.get_current_key();
522
523 for line1 in &self.seq {
524 for line2 in &other.seq {
525 if repr.uses_format() {
526 repr.write_format(writer, |spec| match *spec {
527 Spec::Key => key,
528 Spec::Field(file_num, field_num) => {
529 if file_num == self.file_num {
530 return line1.get_field(field_num);
531 }
532
533 if file_num == other.file_num {
534 return line2.get_field(field_num);
535 }
536
537 None
538 }
539 })?;
540 } else {
541 repr.write_field(writer, key)?;
542 repr.write_fields(writer, line1, self.key)?;
543 repr.write_fields(writer, line2, other.key)?;
544 }
545
546 repr.write_line_ending(writer)?;
547 }
548 }
549
550 Ok(())
551 }
552
553 fn reset(&mut self, next_line: Option<Line>) {
555 self.seq.clear();
556
557 if let Some(line) = next_line {
558 self.seq.push(line);
559 }
560 }
561
562 fn reset_read_line<Sep: Separator>(
563 &mut self,
564 input: &Input<Sep>,
565 ) -> Result<(), std::io::Error> {
566 let line = self.read_line(&input.separator)?;
567 self.reset(line);
568 Ok(())
569 }
570
571 fn reset_next_line<Sep: Separator>(&mut self, input: &Input<Sep>) -> Result<(), JoinError> {
572 let line = self.next_line(input)?;
573 self.reset(line);
574 Ok(())
575 }
576
577 fn has_line(&self) -> bool {
578 !self.seq.is_empty()
579 }
580
581 fn initialize<Sep: Separator>(
582 &mut self,
583 read_sep: &Sep,
584 autoformat: bool,
585 ) -> std::io::Result<usize> {
586 if let Some(line) = self.read_line(read_sep)? {
587 self.seq.push(line);
588
589 if autoformat {
590 return Ok(self.seq[0].field_ranges.len());
591 }
592 }
593 Ok(0)
594 }
595
596 fn finalize<Sep: Separator>(
597 &mut self,
598 writer: &mut impl Write,
599 input: &Input<Sep>,
600 repr: &Repr<'a, Sep>,
601 ) -> UResult<()> {
602 if self.has_line() {
603 if self.print_unpaired {
604 self.write_first_line(writer, repr)?;
605 }
606
607 let mut next_line = self.next_line(input)?;
608 while let Some(line) = &next_line {
609 if self.print_unpaired {
610 self.write_line(writer, line, repr)?;
611 }
612 self.reset(next_line);
613 next_line = self.next_line(input)?;
614 }
615 }
616
617 Ok(())
618 }
619
620 fn read_line<Sep: Separator>(&mut self, sep: &Sep) -> Result<Option<Line>, std::io::Error> {
622 match self.lines.next() {
623 Some(value) => {
624 self.line_num += 1;
625 let line = Line::new(value?, sep, self.max_len);
626 if line.field_ranges.len() > self.max_len {
627 self.max_len = line.field_ranges.len();
628 }
629 Ok(Some(line))
630 }
631 None => Ok(None),
632 }
633 }
634
635 fn next_line<Sep: Separator>(&mut self, input: &Input<Sep>) -> Result<Option<Line>, JoinError> {
637 if let Some(line) = self.read_line(&input.separator)? {
638 if input.check_order == CheckOrder::Disabled {
639 return Ok(Some(line));
640 }
641
642 let diff = input.compare(self.get_current_key(), line.get_field(self.key));
643
644 if diff == Ordering::Greater
645 && (input.check_order == CheckOrder::Enabled
646 || (self.has_unpaired && !self.has_failed))
647 {
648 let err_msg = translate!("join-error-not-sorted", "file" => self.file_name.maybe_quote(), "line_num" => self.line_num, "content" => String::from_utf8_lossy(&line.string));
649 if input.check_order == CheckOrder::Enabled {
651 return Err(JoinError::UnorderedInput(err_msg));
652 }
653 eprintln!("{}: {err_msg}", uucore::execution_phrase());
654 self.has_failed = true;
655 }
656
657 Ok(Some(line))
658 } else {
659 Ok(None)
660 }
661 }
662
663 fn get_current_key(&self) -> Option<&[u8]> {
665 self.seq[0].get_field(self.key)
666 }
667
668 fn write_line<Sep: Separator>(
669 &self,
670 writer: &mut impl Write,
671 line: &Line,
672 repr: &Repr<'a, Sep>,
673 ) -> Result<(), std::io::Error> {
674 if repr.uses_format() {
675 repr.write_format(writer, |spec| match *spec {
676 Spec::Key => line.get_field(self.key),
677 Spec::Field(file_num, field_num) => {
678 if file_num == self.file_num {
679 line.get_field(field_num)
680 } else {
681 None
682 }
683 }
684 })?;
685 } else {
686 repr.write_field(writer, line.get_field(self.key))?;
687 repr.write_fields(writer, line, self.key)?;
688 }
689
690 repr.write_line_ending(writer)
691 }
692
693 fn write_first_line<Sep: Separator>(
694 &self,
695 writer: &mut impl Write,
696 repr: &Repr<'a, Sep>,
697 ) -> Result<(), std::io::Error> {
698 self.write_line(writer, &self.seq[0], repr)
699 }
700}
701
702fn parse_separator(value_os: &OsString) -> UResult<SepSetting> {
703 if value_os.is_empty() {
711 return Ok(SepSetting::Line);
712 }
713
714 #[cfg(unix)]
715 {
716 let value = value_os.as_bytes();
717 if value.len() == 1 {
718 return Ok(SepSetting::Byte(value[0]));
719 }
720 }
721
722 let Some(value) = value_os.to_str() else {
723 #[cfg(unix)]
724 return Err(USimpleError::new(1, translate!("join-error-non-utf8-tab")));
725 #[cfg(not(unix))]
726 return Err(USimpleError::new(
727 1,
728 translate!("join-error-unprintable-separators"),
729 ));
730 };
731
732 let mut chars = value.chars();
733 let c = chars.next().expect("valid string with at least one byte");
734 match chars.next() {
735 None => Ok(SepSetting::Char(value.into())),
736 Some('0') if c == '\\' => Ok(SepSetting::Byte(0)),
737 _ => Err(USimpleError::new(
738 1,
739 translate!("join-error-multi-character-tab", "value" => value),
740 )),
741 }
742}
743
744fn parse_print_settings(matches: &clap::ArgMatches) -> UResult<(bool, bool, bool)> {
745 let mut print_joined = true;
746 let mut print_unpaired1 = false;
747 let mut print_unpaired2 = false;
748
749 let v_values = matches.get_many::<String>("v");
750 if v_values.is_some() {
751 print_joined = false;
752 }
753
754 let unpaired = v_values
755 .unwrap_or_default()
756 .chain(matches.get_many("a").unwrap_or_default());
757 for file_num in unpaired {
758 match parse_file_number(file_num)? {
759 FileNum::File1 => print_unpaired1 = true,
760 FileNum::File2 => print_unpaired2 = true,
761 }
762 }
763
764 Ok((print_joined, print_unpaired1, print_unpaired2))
765}
766
767fn get_and_parse_field_number(matches: &clap::ArgMatches, key: &str) -> UResult<Option<usize>> {
768 let value = matches.get_one::<String>(key).map(|s| s.as_str());
769 parse_field_number_option(value)
770}
771
772#[allow(clippy::field_reassign_with_default)]
777fn parse_settings(matches: &clap::ArgMatches) -> UResult<Settings> {
778 let keys = get_and_parse_field_number(matches, "j")?;
779 let key1 = get_and_parse_field_number(matches, "1")?;
780 let key2 = get_and_parse_field_number(matches, "2")?;
781
782 let (print_joined, print_unpaired1, print_unpaired2) = parse_print_settings(matches)?;
783
784 let mut settings = Settings::default();
785
786 settings.print_joined = print_joined;
787 settings.print_unpaired1 = print_unpaired1;
788 settings.print_unpaired2 = print_unpaired2;
789
790 settings.ignore_case = matches.get_flag("i");
791 settings.key1 = get_field_number(keys, key1)?;
792 settings.key2 = get_field_number(keys, key2)?;
793 if let Some(value_os) = matches.get_one::<OsString>("t") {
794 settings.separator = parse_separator(value_os)?;
795 }
796 if let Some(format) = matches.get_one::<String>("o") {
797 if format == "auto" {
798 settings.autoformat = true;
799 } else {
800 let mut specs = vec![];
801 for part in format.split([' ', ',', '\t']) {
802 specs.push(Spec::parse(part)?);
803 }
804 settings.format = specs;
805 }
806 }
807
808 if let Some(empty) = matches.get_one::<String>("e") {
809 settings.empty = empty.as_bytes().to_vec();
810 }
811
812 if matches.get_flag("nocheck-order") {
813 settings.check_order = CheckOrder::Disabled;
814 }
815
816 if matches.get_flag("check-order") {
817 settings.check_order = CheckOrder::Enabled;
818 }
819
820 if matches.get_flag("header") {
821 settings.headers = true;
822 }
823
824 settings.line_ending = LineEnding::from_zero_flag(matches.get_flag("z"));
825
826 Ok(settings)
827}
828
829#[uucore::main]
830pub fn uumain(args: impl uucore::Args) -> UResult<()> {
831 let matches = uucore::clap_localization::handle_clap_result(uu_app(), args)?;
832
833 let mut opts = CollatorOptions::default();
834 opts.alternate_handling = Some(AlternateHandling::Shifted);
835 let _ = try_init_collator(opts);
836
837 let settings = parse_settings(&matches)?;
838
839 let file1 = matches.get_one::<OsString>("file1").unwrap();
840 let file2 = matches.get_one::<OsString>("file2").unwrap();
841
842 if file1 == "-" && file2 == "-" {
843 return Err(USimpleError::new(
844 1,
845 translate!("join-error-both-files-stdin"),
846 ));
847 }
848
849 let sep = settings.separator.clone();
850 match sep {
851 SepSetting::Byte(byte) => exec(file1, file2, settings, OneByteSep { byte: [byte] }),
852 SepSetting::Char(c) => exec(
853 file1,
854 file2,
855 settings,
856 MultiByteSep {
857 finder: Finder::new(&c),
858 },
859 ),
860 SepSetting::Whitespaces => exec(file1, file2, settings, WhitespaceSep {}),
861 SepSetting::Line => exec(file1, file2, settings, LineSep {}),
862 }
863}
864
865pub fn uu_app() -> Command {
866 Command::new(uucore::util_name())
867 .version(uucore::crate_version!())
868 .help_template(uucore::localized_help_template(uucore::util_name()))
869 .about(translate!("join-about"))
870 .override_usage(format_usage(&translate!("join-usage")))
871 .infer_long_args(true)
872 .arg(
873 Arg::new("a")
874 .short('a')
875 .action(ArgAction::Append)
876 .num_args(1)
877 .value_parser(["1", "2"])
878 .value_name("FILENUM")
879 .help(translate!("join-help-a")),
880 )
881 .arg(
882 Arg::new("v")
883 .short('v')
884 .action(ArgAction::Append)
885 .num_args(1)
886 .value_parser(["1", "2"])
887 .value_name("FILENUM")
888 .help(translate!("join-help-v")),
889 )
890 .arg(
891 Arg::new("e")
892 .short('e')
893 .value_name("EMPTY")
894 .help(translate!("join-help-e")),
895 )
896 .arg(
897 Arg::new("i")
898 .short('i')
899 .long("ignore-case")
900 .help(translate!("join-help-i"))
901 .action(ArgAction::SetTrue),
902 )
903 .arg(
904 Arg::new("j")
905 .short('j')
906 .value_name("FIELD")
907 .help(translate!("join-help-j")),
908 )
909 .arg(
910 Arg::new("o")
911 .short('o')
912 .value_name("FORMAT")
913 .help(translate!("join-help-o")),
914 )
915 .arg(
916 Arg::new("t")
917 .short('t')
918 .value_name("CHAR")
919 .value_parser(ValueParser::os_string())
920 .help(translate!("join-help-t")),
921 )
922 .arg(
923 Arg::new("1")
924 .short('1')
925 .value_name("FIELD")
926 .help(translate!("join-help-1")),
927 )
928 .arg(
929 Arg::new("2")
930 .short('2')
931 .value_name("FIELD")
932 .help(translate!("join-help-2")),
933 )
934 .arg(
935 Arg::new("check-order")
936 .long("check-order")
937 .help(translate!("join-help-check-order"))
938 .action(ArgAction::SetTrue),
939 )
940 .arg(
941 Arg::new("nocheck-order")
942 .long("nocheck-order")
943 .help(translate!("join-help-nocheck-order"))
944 .action(ArgAction::SetTrue),
945 )
946 .arg(
947 Arg::new("header")
948 .long("header")
949 .help(translate!("join-help-header"))
950 .action(ArgAction::SetTrue),
951 )
952 .arg(
953 Arg::new("z")
954 .short('z')
955 .long("zero-terminated")
956 .help(translate!("join-help-z"))
957 .action(ArgAction::SetTrue),
958 )
959 .arg(
960 Arg::new("file1")
961 .required(true)
962 .value_name("FILE1")
963 .value_hint(clap::ValueHint::FilePath)
964 .value_parser(clap::value_parser!(OsString))
965 .hide(true),
966 )
967 .arg(
968 Arg::new("file2")
969 .required(true)
970 .value_name("FILE2")
971 .value_hint(clap::ValueHint::FilePath)
972 .value_parser(clap::value_parser!(OsString))
973 .hide(true),
974 )
975}
976
977fn exec<Sep: Separator>(
978 file1: &OsString,
979 file2: &OsString,
980 settings: Settings,
981 sep: Sep,
982) -> UResult<()> {
983 let stdin = stdin();
984
985 let mut state1 = State::new(
986 FileNum::File1,
987 file1,
988 &stdin,
989 settings.key1,
990 settings.line_ending,
991 settings.print_unpaired1,
992 )?;
993
994 let mut state2 = State::new(
995 FileNum::File2,
996 file2,
997 &stdin,
998 settings.key2,
999 settings.line_ending,
1000 settings.print_unpaired2,
1001 )?;
1002
1003 let input = Input::new(
1004 sep.clone(),
1005 settings.ignore_case,
1006 settings.check_order,
1007 should_use_locale_collation(),
1008 );
1009
1010 let format = if settings.autoformat {
1011 let mut format = vec![Spec::Key];
1012 let mut initialize = |state: &mut State| -> UResult<()> {
1013 let max_fields = state.initialize(&sep, settings.autoformat)?;
1014 for i in 0..max_fields {
1015 if i != state.key {
1016 format.push(Spec::Field(state.file_num, i));
1017 }
1018 }
1019 Ok(())
1020 };
1021 initialize(&mut state1)?;
1022 initialize(&mut state2)?;
1023 format
1024 } else {
1025 state1.initialize(&sep, settings.autoformat)?;
1026 state2.initialize(&sep, settings.autoformat)?;
1027 settings.format
1028 };
1029
1030 let repr = Repr::new(settings.line_ending, sep, format, &settings.empty);
1031
1032 let stdout = stdout();
1033 let mut writer = BufWriter::new(stdout.lock());
1034
1035 if settings.headers {
1036 state1.write_headers(&mut writer, &state2, &repr)?;
1037 state1.reset_read_line(&input)?;
1038 state2.reset_read_line(&input)?;
1039 }
1040
1041 while state1.has_line() && state2.has_line() {
1042 let diff = input.compare(state1.get_current_key(), state2.get_current_key());
1043
1044 match diff {
1045 Ordering::Less => {
1046 if let Err(e) = state1.skip_line(&mut writer, &input, &repr) {
1047 writer.flush()?;
1048 return Err(e);
1049 }
1050 state1.has_unpaired = true;
1051 state2.has_unpaired = true;
1052 }
1053 Ordering::Greater => {
1054 if let Err(e) = state2.skip_line(&mut writer, &input, &repr) {
1055 writer.flush()?;
1056 return Err(e);
1057 }
1058 state1.has_unpaired = true;
1059 state2.has_unpaired = true;
1060 }
1061 Ordering::Equal => {
1062 let next_line1 = match state1.extend(&input) {
1063 Ok(line) => line,
1064 Err(e) => {
1065 writer.flush()?;
1066 return Err(e);
1067 }
1068 };
1069 let next_line2 = match state2.extend(&input) {
1070 Ok(line) => line,
1071 Err(e) => {
1072 writer.flush()?;
1073 return Err(e);
1074 }
1075 };
1076
1077 if settings.print_joined {
1078 state1.combine(&mut writer, &state2, &repr)?;
1079 }
1080
1081 state1.reset(next_line1);
1082 state2.reset(next_line2);
1083 }
1084 }
1085 }
1086
1087 if let Err(e) = state1.finalize(&mut writer, &input, &repr) {
1088 writer.flush()?;
1089 return Err(e);
1090 }
1091 if let Err(e) = state2.finalize(&mut writer, &input, &repr) {
1092 writer.flush()?;
1093 return Err(e);
1094 }
1095
1096 writer.flush()?;
1097
1098 if state1.has_failed || state2.has_failed {
1099 eprintln!(
1100 "{}: {}",
1101 uucore::execution_phrase(),
1102 translate!("join-error-input-not-sorted")
1103 );
1104 set_exit_code(1);
1105 }
1106 Ok(())
1107}
1108
1109fn get_field_number(keys: Option<usize>, key: Option<usize>) -> UResult<usize> {
1112 if let Some(keys) = keys {
1113 if let Some(key) = key {
1114 if keys != key {
1115 return Err(USimpleError::new(
1117 1,
1118 translate!("join-error-incompatible-fields", "field1" => (keys + 1), "field2" => (key + 1)),
1119 ));
1120 }
1121 }
1122
1123 return Ok(keys);
1124 }
1125
1126 Ok(key.unwrap_or(0))
1127}
1128
1129fn parse_field_number(value: &str) -> UResult<usize> {
1132 match value.parse::<usize>() {
1133 Ok(result) if result > 0 => Ok(result - 1),
1134 Err(e) if e.kind() == &IntErrorKind::PosOverflow => Ok(usize::MAX),
1135 _ => Err(USimpleError::new(
1136 1,
1137 translate!("join-error-invalid-field-number", "value" => value.quote()),
1138 )),
1139 }
1140}
1141
1142fn parse_file_number(value: &str) -> UResult<FileNum> {
1143 match value {
1144 "1" => Ok(FileNum::File1),
1145 "2" => Ok(FileNum::File2),
1146 value => Err(USimpleError::new(
1147 1,
1148 translate!("join-error-invalid-file-number-simple", "value" => value.quote()),
1149 )),
1150 }
1151}
1152
1153fn parse_field_number_option(value: Option<&str>) -> UResult<Option<usize>> {
1154 match value {
1155 None => Ok(None),
1156 Some(val) => Ok(Some(parse_field_number(val)?)),
1157 }
1158}