argyle/
macros.rs

1/*!
2# Argyle: Macros.
3*/
4
5
6
7#[macro_export(local_inner_macros)]
8/// # Generate a CLI Argument Enum and Parser/Iterator.
9///
10/// This macro generates a custom enum and iterator to help with CLI argument
11/// parsing.
12///
13/// `argue!` is intended for use cases requiring more than the standard library's
14/// barebones [`args_os`](::std::env::args_os) helper, but less than the
15/// full-service offerings (and overhead) of a crate like [clap](https://crates.io/crates/clap).
16///
17/// It'll automatically convert UTF-8 arguments to `String`s (without
18/// panicking), untangle combined key/value pair representations like `-kval`
19/// or `--key=val`, and stop if/when it encounters an end-of-command terminator
20/// (`"--"`).
21///
22/// The subsequent validation and handling, however, are left _entirely up to
23/// you_. Loop, match, and proceed however you see fit!
24///
25/// ## Example
26///
27/// ```
28/// use argyle::argue;
29///
30/// // Construct the enum and iterator.
31/// argue! {
32///     // By default, this macro will call the enum "Argument" and the
33///     // iterator "ArgumentIter". If you'd rather they be called something
34///     // else, or have a non-private scope, you can override the defaults
35///     // by kicking things off with the following.
36///
37///     /// # My Arguments Enum.
38///     ///
39///     /// If you supply documentation like this
40///     #[doc = "and/or like this"]
41///     /// it'll be attached to the generated object.
42///     pub             // You can optionally change the scope like so.
43///     MyArgument,     // A name and trailing comma are required.
44///
45///     MyArgumentIter, // Naked works too if you don't care about docs/scope,
46///                     // though clippy may scold you. ;)
47///
48///     // --------------------
49///
50///     // If you have valueless keywords, they come next as a comma-separated
51///     // list.
52///     //
53///     // Each entry needs an ident for the variant name and one or more
54///     // string literals to match against.
55///     Help    "-h" "--help",
56///     Version "-V" "--version",
57///     Stderr       "--stderr",
58///
59///     // --------------------
60///
61///     // If you have option keywords, those come next, but require an
62///     // "@options" marker to announce their presence.
63///     @options
64///
65///     // The list format is otherwise identical to their valueless
66///     // counterparts.
67///     Format       "--format",
68///     Level   "-l" "--level",
69///
70///     // --------------------
71///
72///     // If you'd like to differentiate unmatched _paths_ from arbitrary
73///     // string values, you can declare a variant for the purpose like so.
74///     @catchall-paths Path,
75///
76///     // --------------------
77///
78///     // Last but not least, the enum will need two catchall variants to
79///     // handle unmatched String and OsString values.
80///     //
81///     // By default, these are auto-generated as "Other" and "OtherOs",
82///     // but if you'd like to call them something else, now's the time!
83///     @catchall Invalid InvalidUtf8,
84/// }
85///
86/// /// # Main.
87/// fn main() {
88/// # use std::path::PathBuf;
89///     // Example settings.
90///     let mut stderr = false;
91///     let mut format: Option<Format> = None;
92///     let mut level = 0_u8;
93///     let mut paths: Vec<PathBuf> = Vec::new();
94///
95///     // Loop through the environmental arguments, taking whatever actions
96///     // make sense for your application.
97///     for arg in MyArgument::args_os() {
98///         match arg {
99///             // You named these!
100///             MyArgument::Help => print_help(),
101///             MyArgument::Version => print_version(),
102///             MyArgument::Stderr => { stderr = true; },
103///
104///             // Options come with the value as a String.
105///             MyArgument::Format(v) => {
106///                 format = Format::from_str(v);
107///             },
108///             MyArgument::Level(v) => {
109///                 level = v.parse().unwrap_or(0);
110///             },
111///
112///             // If you specified @catchall-paths, unmatched OsString values
113///             // that happen to be (valid) filesystem paths will be mapped
114///             // thusly (instead of to a generic catchall).
115///             MyArgument::Path(v) => {
116///                 paths.push(PathBuf::from(v));
117///             },
118///
119///             // Unmatched String values map to the first generic catchall.
120///             MyArgument::Invalid(v) => {
121///                 eprintln!("Warning: unrecognized CLI argument {v}.");
122///             },
123///
124///             // Unmatched values with invalid UTF-8 will be passed through
125///             // to the second generic catchall as OsString values.
126///             MyArgument::InvalidUtf8(v) => {
127///                 eprintln!(
128///                     "Warning: unrecognized CLI argument {}.",
129///                     v.display(),
130///                 );
131///             },
132///         }
133///     }
134///
135///     // Now that the settings have been worked out, do something!
136///     // …
137/// }
138/// # fn print_help() {}
139/// # fn print_version() {}
140/// # enum Format { Plain, Json }
141/// # impl Format {
142/// #     fn from_str(str: String) -> Option<Self> { None }
143/// # }
144/// ```
145///
146/// ## Generated Code.
147///
148/// If you're curious or need to do something more complicated, taking a look
149/// at the generated code can be helpful.
150///
151/// The call to `argue!` in the previous example, for example, will have added
152/// the following to the module:
153///
154/// ```
155/// # use std::env::ArgsOs;
156/// # use std::ffi::OsString;
157/// # use std::iter::FusedIterator;
158/// # use std::iter::Skip;
159/// #[derive(Debug, Clone, Eq, PartialEq)]
160/// /// # My Arguments Enum.
161/// ///
162/// /// If you supply documentation like this and/or like this it'll be
163/// /// attached to the generated object.
164/// pub enum MyArgument {
165///     /// # Matches "-h" "--help".
166///     Help,
167///
168///     /// # Matches "-V" "--version".
169///     Version,
170///
171///     /// # Matches "--stderr".
172///     Stderr,
173///
174///     /// # Matches "--format".
175///     Format(String),
176///
177///     /// # Matches "-l" "--level".
178///     Level(String),
179///
180///     /// # Unassociated Path Value.
181///     Path(OsString),
182///
183///     /// # Unspecified Value.
184///     Invalid(String),
185///
186///     /// # Unspecified Value (Invalid UTF-8).
187///     InvalidUtf8(OsString),
188/// }
189///
190/// impl MyArgument {
191///     /// # Environmental Argument Iterator.
192///     ///
193///     /// Return a new [`MyArgumentIter`] instance seeded with [`ArgsOs`]
194///     /// (minus the first entry corresponding to the executable path).
195///     pub fn args_os() -> MyArgumentIter<Skip<ArgsOs>> {
196/// # MyArgumentIter::new(::std::env::args_os().skip(1))
197///         // …
198///     }
199/// }
200///
201/// #[derive(Debug, Clone)]
202/// struct MyArgumentIter<T> {
203/// #    iter: T,
204/// #    done: bool,
205///         // …
206/// }
207///
208/// // Note: the generated member methods share the parent's scope. The
209/// // iterator was left private in the example, so the generated methods are
210/// // private too.
211///
212/// impl<T: Iterator<Item=OsString>> MyArgumentIter<T> {
213///     #[inline]
214///     #[must_use]
215///     /// # New Instance.
216///     ///
217///     /// Create and return a new parsing iterator over any arbitrary
218///     /// iterator of `OsString`.
219///     const fn new(src: T) -> Self {
220/// #        Self {
221/// #            iter: src,
222/// #            done: false,
223/// #        }
224///         // …
225///     }
226///
227///     #[inline]
228///     #[must_use]
229///     /// # Into Inner (Iterator).
230///     ///
231///     /// Return what's left of the inner iterator.
232///     fn into_inner(self) -> T {
233/// # self.iter
234///         // …
235///     }
236/// }
237///
238/// impl<T: Iterator<Item=OsString>> Iterator for MyArgumentIter<T> {
239///     type Item = MyArgument;
240///
241///     fn next(&mut self) -> Option<Self::Item> {
242/// # None
243///         // …
244///     }
245/// }
246///
247/// impl<T: Iterator<Item=OsString>> FusedIterator for MyArgumentIter<T> {}
248/// ```
249///
250/// ## Keyword Formatting
251///
252/// The macro supports (practically) any number of named keywords, with or without values,
253/// but there are _rules_ for the literals they match against to ensure proper
254/// parsing.
255///
256/// * Short keys — `"-k"` — must be exactly two bytes: a hyphen and an ASCII alphanumeric.
257/// * Long keys — `"--key"` — must start with two hyphens and an ASCII alphanumeric, and contain only alphanumerics, hyphens, and underscores thereafter.
258/// * Commands — `"keyword"` — must start with an ASCII alphanumeric, and contain only alphanumerics, hyphens, and underscores thereafter.
259///
260/// Format sanity is evaluated at compile-time, so issues like the following
261/// will trigger an error.
262///
263/// ```compile_fail
264/// argyle::argue! {
265///     MyArgument,
266///     MyArgumentIter,
267///
268///     Level "-level", // Not short enough.
269/// }
270/// ```
271///
272/// ```compile_fail
273/// argyle::argue! {
274///     MyArgument,
275///     MyArgumentIter,
276///
277///     Level "-❤️", // Cute, but not ASCII alphanumeric.
278/// }
279/// ```
280///
281/// ```compile_fail
282/// argyle::argue! {
283///     MyArgument,
284///     MyArgumentIter,
285///
286///     FooBar "--foo bar", // Whitespace is illegal.
287/// }
288/// ```
289///
290/// ```compile_fail
291/// argyle::argue! {
292///     MyArgument,
293///     MyArgumentIter,
294///
295///     Build "build!!!", // Settle down…
296/// }
297/// ```
298///
299/// This probably goes without saying, but keyword idents and literals must
300/// also be unique. Haha.
301///
302/// ## Parsing Particulars
303///
304/// Key/value pairs are parsed identically whether they appear consecutively
305/// — e.g. `--key` then `value` — or combined in any of the following ways:
306/// * `-kvalue`
307/// * `-k=value`
308/// * `-k = value`
309/// * `--key=value`
310/// * `--key = value`
311///
312/// Option values must, however, be valid UTF-8, otherwise the key and value
313/// will be returned as a joint `OtherOs(OsString)` in `--key=value` format.
314///
315/// Keyword matches are otherwise a case-sensitive, all-or-nothing affair.
316///
317/// Parsing will stop early if an end-of-command terminator (`"--"`) is
318/// encountered. If your program needs to handle what comes _after_, adjust
319/// the loop like so:
320///
321/// ```
322/// # argyle::argue! {};
323/// # type MyArgument = Argument;
324/// # type MyArgumentIter<T> = ArgumentIter<T>;
325/// // Save the iterator to a variable and traverse it one value at a time
326/// // to keep it in scope.
327/// let mut args = MyArgument::args_os();
328/// while let Some(arg) = args.next() {
329///     // Process as normal.
330/// }
331///
332/// // Create a second iterator instance from the remains of the first to
333/// // loop through whatever was left, if anything.
334/// for arg in MyArgumentIter::new(args.into_inner()) {
335///     // Do something.
336/// }
337/// ```
338macro_rules! argue {
339	// The full menu.
340	(
341		$( #[doc = $enum_doc:expr] )*
342		$enum_vis:vis $enum:ident,
343
344		$( #[doc = $iter_doc:expr] )*
345		$iter_vis:vis $iter:ident,
346
347		$( $key:ident $( $key_lit:literal )+, )*
348
349		$( @options $( $keyvalue:ident $( $keyvalue_lit:literal )+, )+ )?
350
351		$( @catchall-paths $path:ident, )?
352
353		@catchall $other:ident $otheros:ident,
354	) => (
355		#[allow(dead_code, reason = "Auto-generated.")]
356		#[derive(Debug, Clone, Eq, PartialEq)]
357		$( #[doc = $enum_doc] )*
358		$enum_vis enum $enum {
359			$(
360				#[doc = ::std::concat!(
361					"# Matches",
362					$( " \"", ::std::stringify!($key_lit), "\"", )+
363					".",
364				)]
365				$key,
366			)*
367			$( $(
368				#[doc = ::std::concat!(
369					"# Matches",
370					$( " \"", ::std::stringify!($keyvalue_lit), "\"", )+
371					".",
372				)]
373				$keyvalue(String),
374			)+ )?
375
376			$(
377				/// # Unassociated Path Value.
378				$path(::std::ffi::OsString),
379			)?
380
381			/// # Unspecified Value.
382			$other(String),
383
384			/// # Unspecified Value (Invalid UTF-8).
385			$otheros(::std::ffi::OsString),
386		}
387
388		/// # Check Key Validity.
389		///
390		/// The compiler should optimize this out.
391		const _: () = {
392			/// # Check Validity.
393			const fn check(k: &str) -> bool {
394				let mut k = k.as_bytes();
395				match k {
396					// Short key.
397					[b'-', a] => return a.is_ascii_alphanumeric(),
398
399					// Long key/Command.
400					[b'-', b'-', a, rest @ ..] |
401					[a, rest @ .. ] =>
402						if a.is_ascii_alphanumeric() { k = rest; }
403						else { return false; },
404
405					// Dunno, but it's wrong.
406					_ => return false,
407				}
408
409				// Make sure the rest is ASCII alphanumeric or - or _.
410				while let [n, rest @ ..] = k {
411					if ! (n.is_ascii_alphanumeric() || ::std::matches!(*n, b'-' | b'_')) {
412						return false;
413					}
414					k = rest;
415				}
416
417				true
418			}
419
420			$($(
421				::std::assert!(
422					check($key_lit),
423					"Invalid `argue!` keyword literal.",
424				);
425			)+)*
426			$($($(
427				::std::assert!(
428					check($keyvalue_lit),
429					"Invalid `argue!` option literal.",
430				);
431			)+)+)?
432		};
433
434		#[allow(dead_code, reason = "Auto-generated.")]
435		impl $enum {
436			/// # Environmental Argument Iterator.
437			///
438			/// Return a new
439			#[doc = ::std::concat!("[`", ::std::stringify!($iter), "`]")]
440			/// instance seeded with [`ArgsOs`](::std::env::ArgsOs) (minus the
441			/// first entry corresponding to the executable path).
442			$enum_vis fn args_os() -> $iter<::std::iter::Skip<::std::env::ArgsOs>> {
443				$iter::new(::std::env::args_os().skip(1))
444			}
445		}
446
447		#[derive(Debug, Clone)]
448		$( #[doc = $iter_doc] )*
449		$iter_vis struct $iter<T> {
450			/// # Inner Iterator.
451			iter: T,
452
453			/// # Terminator "--" Found.
454			done: bool,
455		}
456
457		#[allow(dead_code, reason = "Auto-generated.")]
458		impl<T: Iterator<Item=::std::ffi::OsString>> $iter<T> {
459			#[inline]
460			#[must_use]
461			/// # New Iterator.
462			$iter_vis const fn new(src: T) -> Self {
463				Self {
464					iter: src,
465					done: false,
466				}
467			}
468
469			#[inline]
470			#[must_use]
471			/// # Into Inner (Iterator).
472			///
473			/// Return what's left of the inner iterator.
474			$iter_vis fn into_inner(self) -> T { self.iter }
475		}
476
477		impl<T: Iterator<Item=::std::ffi::OsString>> Iterator for $iter<T> {
478			type Item = $enum;
479
480			fn next(&mut self) -> Option<Self::Item> {
481				// Already terminated!
482				if self.done { return None; }
483
484				loop {
485					let next = match self.iter.next()?.into_string() {
486						Ok(next) => next,
487
488						// We can't do anything with OsString; return as-is.
489						Err(e) => {
490							$(
491								// Or maybe not nothing…
492								if ::std::matches!(::std::fs::exists(&e), Ok(true)) {
493									return Some($enum::$path(::std::ffi::OsString::from(e)));
494								}
495							)?
496
497							return Some($enum::$otheros(e));
498						},
499					};
500
501					// Skip empty values.
502					if next.is_empty() { continue; }
503
504					// If we've hit the separator, gobble up the remaining bits
505					// and return.
506					if next == "--" {
507						self.done = true;
508						return None;
509					}
510
511					// Try to match a key exactly.
512					match next.as_str() {
513						$(
514							$( $key_lit )|+ => return Some($enum::$key),
515						)*
516						$($(
517							$( $keyvalue_lit )|+ => match self.iter.next()?.into_string() {
518								Ok(s) => return Some($enum::$keyvalue(s)),
519								// Build a value we can return.
520								Err(e) => {
521									let mut boo = ::std::ffi::OsString::from(next);
522									boo.push("=");
523									boo.push(e);
524									return Some($enum::$otheros(boo));
525								},
526							},
527						)+)?
528						_ => {},
529					}
530
531					// Try to match a key-and-value.
532					$(
533						// Try to match a --key=value.
534						if next.starts_with("--") {
535							if let Some((a, b)) = next.split_once('=') {
536								match a.trim_ascii_end() {
537									$(
538										$( $keyvalue_lit )|+ => return Some(
539											$enum::$keyvalue(b.trim().to_owned())
540										),
541									)+
542									_ => {},
543								}
544							}
545						}
546
547						// Try to match a -kValue.
548						else if next.starts_with('-') && let Some((a, b)) = next.split_at_checked(2) {
549							match a {
550								$(
551									$( $keyvalue_lit )|+ => {
552										let mut b = b.trim_ascii();
553										if let Some(rest) = b.strip_prefix('=') {
554											b = rest.trim_ascii_start();
555										}
556
557										return Some($enum::$keyvalue(b.to_owned()));
558									},
559								)+
560								_ => {},
561							}
562						}
563					)?
564
565					$(
566						// Maybe it's a path?
567						if ::std::matches!(::std::fs::exists(&next), Ok(true)) {
568							return Some($enum::$path(::std::ffi::OsString::from(next)));
569						}
570					)?
571
572					// Who knows?
573					return Some($enum::$other(next));
574				}
575			}
576
577			fn size_hint(&self) -> (usize, Option<usize>) {
578				if self.done { (0, Some(0)) }
579				else {
580					let (_, upper) = self.iter.size_hint();
581					(0, upper.map(|n| n * 2))
582				}
583			}
584		}
585
586		impl<T: Iterator<Item=::std::ffi::OsString>> ::std::iter::FusedIterator for $iter<T> {}
587	);
588
589	// Same as above, but without @catchall overrides.
590	(
591		$( #[doc = $enum_doc:expr] )*
592		$enum_vis:vis $enum:ident,
593
594		$( #[doc = $iter_doc:expr] )*
595		$iter_vis:vis $iter:ident,
596
597		$( $key:ident $( $key_lit:literal )+, )*
598
599		$( @options $( $keyvalue:ident $( $keyvalue_lit:literal )+, )+ )?
600
601		$( @catchall-paths $path:ident, )?
602	) => (
603		// Recurse with the default values filled out.
604		$crate::argue! {
605			$( #[doc = $enum_doc] )*
606			$enum_vis $enum,
607
608			$( #[doc = $iter_doc] )*
609			$iter_vis $iter,
610
611			$( $key $( $key_lit )+, )*
612
613			$( @options $( $keyvalue $( $keyvalue_lit )+, )+ )?
614
615			$( @catchall-paths $path, )?
616
617			@catchall Other OtherOs,
618		}
619	);
620
621	// Same as above, but without the enum/iterator overrides.
622	(
623		$( $key:ident $( $key_lit:literal )+, )*
624
625		$( @options $( $keyvalue:ident $( $keyvalue_lit:literal )+, )+ )?
626
627		$( @catchall-paths $path:ident, )?
628
629		$( @catchall $other:ident $otheros:ident, )?
630	) => (
631		// Recurse with the default values filled out.
632		$crate::argue! {
633			/// # CLI Arguments.
634			Argument,
635
636			/// # CLI Argument Iterator.
637			ArgumentIter,
638
639			$( $key $( $key_lit )+, )*
640
641			$( @options $( $keyvalue $( $keyvalue_lit )+, )+ )?
642
643			$( @catchall-paths $path, )?
644
645			$( @catchall $other $otheros, )?
646		}
647	);
648}
649
650
651
652#[cfg(test)]
653mod tests {
654	use std::ffi::OsString as Os;
655
656	#[test]
657	fn t_argue() {
658		argue!{
659			/// # My Arguments.
660			///
661			/// Why argue?
662			pub(crate) MyArgument,
663
664			/// # My Argument Iterator.
665			///
666			/// Why argue?
667			pub(crate) MyArgumentIter,
668
669			Help    "-h" "--help"    "help",
670			Version "-V" "--version" "version",
671
672			@options
673			Output  "-o" "--output",
674
675			@catchall-paths Path,
676
677			// Stick with the default Other/OtherOs catchalls.
678		}
679
680		let args: Vec<Os> = vec![
681			Os::from("-h"),
682			Os::from("--help"),
683			Os::from("help"),
684
685			Os::from("-V"),
686			Os::from("--version"),
687			Os::from("version"),
688
689			Os::from("-o"),
690			Os::from("/path/to/foo"),
691
692			Os::from("--output"),
693			Os::from("/path/to/foo"),
694
695			Os::from("Cargo.toml"),
696			Os::from("Dunno"),
697
698			Os::from("--"),
699			Os::from("a"),
700			Os::from("b"),
701		];
702
703		let mut iter = MyArgumentIter::new(args.into_iter());
704		assert_eq!(iter.next(), Some(MyArgument::Help));
705		assert_eq!(iter.next(), Some(MyArgument::Help));
706		assert_eq!(iter.next(), Some(MyArgument::Help));
707
708		assert_eq!(iter.next(), Some(MyArgument::Version));
709		assert_eq!(iter.next(), Some(MyArgument::Version));
710		assert_eq!(iter.next(), Some(MyArgument::Version));
711
712		assert_eq!(iter.next(), Some(MyArgument::Output("/path/to/foo".to_owned())));
713		assert_eq!(iter.next(), Some(MyArgument::Output("/path/to/foo".to_owned())));
714
715		assert_eq!(iter.next(), Some(MyArgument::Path(Os::from("Cargo.toml"))));
716		assert_eq!(iter.next(), Some(MyArgument::Other("Dunno".to_owned())));
717
718		assert!(iter.next().is_none());
719
720		// There should actually be two items left; but we have to reach
721		// inside to get 'em.
722		let mut iter = iter.into_inner();
723		assert_eq!(iter.next(), Some(Os::from("a")));
724		assert_eq!(iter.next(), Some(Os::from("b")));
725
726		// Now we should be done for real.
727		assert!(iter.next().is_none());
728	}
729
730	#[test]
731	fn t_kv() {
732		argue! {
733			@options Key "-k" "--key",
734		}
735
736		let mut iter = ArgumentIter::new([
737			Os::from("-kValue"),
738			Os::from("-k=Value"),
739			Os::from("-k = Value"),
740			Os::from("-k"),
741			Os::from("Value"),
742			Os::from("--key=Value"),
743			Os::from("--key = Value"),
744			Os::from("--key"),
745			Os::from("Value"),
746		].into_iter());
747
748		assert_eq!(iter.next(), Some(Argument::Key("Value".to_owned())));
749		assert_eq!(iter.next(), Some(Argument::Key("Value".to_owned())));
750		assert_eq!(iter.next(), Some(Argument::Key("Value".to_owned())));
751		assert_eq!(iter.next(), Some(Argument::Key("Value".to_owned())));
752		assert_eq!(iter.next(), Some(Argument::Key("Value".to_owned())));
753		assert_eq!(iter.next(), Some(Argument::Key("Value".to_owned())));
754		assert_eq!(iter.next(), Some(Argument::Key("Value".to_owned())));
755		assert!(iter.next().is_none());
756	}
757}