getargs_os/
lib.rs

1#![cfg_attr(feature = "os_str_bytes", feature(os_str_bytes))]
2
3#![allow(clippy::tabs_in_doc_comments)] // what a stupid fucking lint
4
5//! Adds a newtype wrapper ([`OsArgument`]) around [`OsStr`] that allows it to
6//! be parsed by [`getargs::Options`].
7//!
8//! In combination with the [`argv`](https://docs.rs/argv) crate, this allows
9//! for lowest-cost argument parsing across all platforms (zero-cost on Linux).
10//!
11//! This is a separate crate from `getargs` because it requires (wildly) unsafe
12//! code. `std` does not want us messing with [`OsStr`]s at all!
13//!
14//! ## Usage
15//!
16//! First, obtain an iterator over [`OsStr`]s somehow - I recommend
17//! [`argv`](https://docs.rs/argv) once again - then wrap them in [`OsArgument`]
18//! and pass that to [`Options::new`][getargs::Options::new].
19//!
20//! ```compile_only
21//! # fn main() {
22//! use getargs::Options;
23//! use getargs_os::OsArgument;
24//!
25//! let mut opts = Options::new(argv::iter().skip(1).map(<&OsArgument>::from));
26//! # }
27//! ```
28//!
29//! Then use [`Options`][getargs::Options] as normal - check its documentation
30//! for more usage examples.
31//!
32//! You can use the [`os!`] macro to create new OS strings to compare arguments
33//! against. This macro works on all operating systems. For example:
34//!
35//! ```compile_only
36//! # fn main() {
37//! # use getargs::{Options, Arg};
38//! # use getargs_os::{os, OsArgument};
39//! # let mut opts = Options::new(argv::iter().skip(1).map(<&OsArgument>::from));
40//! while let Some(arg) = opts.next_arg().expect("some ooga booga just happened") {
41//! 	if arg == Arg::Long(os!("help")) {
42//! 		// print help...
43//! 	} else {
44//! 		// ...
45//! 	}
46//! }
47//! # }
48//! ```
49//!
50//! ### `os_str_bytes` feature
51//!
52//! To unlock `From<&str>` and `PartialEq<&str>` impls for `&OsArgument`, you
53//! must enable the unstable `os_str_bytes` feature, which depends on Nightly.
54//! This is because earlier versions of Rust didn't provide guarantees that OS
55//! strings are a superset of UTF-8 (even though `getargs-os` relied on this
56//! anyway in the past). Since the feature now exists, I don't want to make
57//! `getargs-os` unconditionally require Nightly, but new features relying on
58//! this guarantee will be gated behind the `os_str_bytes` feature until it is
59//! stabilized.
60
61use std::ffi::OsStr;
62use std::fmt::{Debug, Formatter};
63use std::hash::{Hash, Hasher};
64use std::ops::{Deref, DerefMut};
65use getargs::Argument;
66
67mod utf8_bs;
68
69#[cfg(test)]
70mod test;
71
72/// A newtype wrapper around [`OsStr`] that allows it to be parsed by
73/// [`Options`][getargs::Options].
74///
75/// The short option type for this [`Argument`] implementation is *UTF-8
76/// codepoints*; however they may not all be valid `char`s.
77#[repr(transparent)]
78pub struct OsArgument(pub OsStr);
79
80impl<'a> From<&'a OsStr> for &'a OsArgument {
81	fn from(from: &'a OsStr) -> Self {
82		// SAFETY: `OsArgument` is `repr(transparent)`
83		unsafe { std::mem::transmute(from) }
84	}
85}
86
87impl<'a> From<&'a OsArgument> for &'a OsStr {
88	fn from(from: &'a OsArgument) -> Self {
89		// SAFETY: `OsArgument` is `repr(transparent)`
90		unsafe { std::mem::transmute(from) }
91	}
92}
93
94#[cfg(feature = "os_str_bytes")]
95impl From<&str> for &OsArgument {
96	fn from(from: &str) -> Self {
97		Self::from(unsafe { OsStr::from_os_str_bytes_unchecked(from.as_bytes()) })
98	}
99}
100
101impl OsArgument {
102	fn as_bytes(&self) -> &[u8] {
103		#[cfg(windows)]
104		// SAFETY: This relies on representation! This is not future-proof!
105		// But there is no other way to do this, OsStr is completely opaque!
106		// `std` tries very hard to hide the contents from us!
107		unsafe { std::mem::transmute(&self.0) }
108
109		#[cfg(not(windows))]
110		// Unix is awesome and `OsStr`s are just byte arrays
111		std::os::unix::ffi::OsStrExt::as_bytes(&self.0)
112	}
113
114	fn from_bytes(bytes: &[u8]) -> &Self {
115		#[cfg(windows)]
116		// SAFETY: Ditto above!
117		unsafe { std::mem::transmute(bytes) }
118
119		#[cfg(not(windows))]
120		// Unix is awesome and `OsStr`s are just byte arrays
121		<&Self as From<&OsStr>>::from(std::os::unix::ffi::OsStrExt::from_bytes(bytes))
122	}
123}
124
125impl Deref for OsArgument {
126	type Target = OsStr;
127
128	fn deref(&self) -> &Self::Target { &self.0 }
129}
130
131impl DerefMut for OsArgument {
132	fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 }
133}
134
135impl PartialEq for OsArgument {
136	fn eq(&self, other: &Self) -> bool { self.0 == other.0 }
137}
138
139#[cfg(feature = "os_str_bytes")]
140impl PartialEq<&str> for &OsArgument {
141	fn eq(&self, other: &str) -> bool {
142		self == other.into()
143	}
144}
145
146#[cfg(feature = "os_str_bytes")]
147impl PartialEq<&OsArgument> for &str {
148	fn eq(&self, other: &OsArgument) -> bool {
149		self.into() == other
150	}
151}
152
153impl Eq for OsArgument {}
154
155impl Debug for OsArgument {
156	fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { self.0.fmt(f) }
157}
158
159impl Hash for OsArgument {
160	fn hash<H: Hasher>(&self, state: &mut H) { self.0.hash(state) }
161}
162
163/// Represents either a Unicode codepoint or an arbitrary byte. Used by
164/// [`OsArgument`] to represent short options.
165#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)]
166pub enum ShortOpt {
167	/// A Unicode codepoint. On Windows, short options will always be valid
168	/// codepoints (but may be invalid characters, such as unpaired surrogates).
169	Codepoint(u32),
170
171	/// An arbitrary byte, which can happen if the [`OsStr`] is invalid Unicode.
172	/// Windows always has valid codepoints, but this may be encountered on Unix
173	/// or Linux systems.
174	Byte(u8)
175}
176
177impl From<char> for ShortOpt {
178	fn from(codepoint: char) -> Self {
179		Self::Codepoint(codepoint as u32)
180	}
181}
182
183impl From<u32> for ShortOpt {
184	fn from(codepoint: u32) -> Self {
185		Self::Codepoint(codepoint)
186	}
187}
188
189impl From<u8> for ShortOpt {
190	fn from(byte: u8) -> Self {
191		Self::Byte(byte)
192	}
193}
194
195impl Argument for &'_ OsArgument {
196	type ShortOpt = ShortOpt;
197
198	#[inline]
199	fn ends_opts(self) -> bool {
200		self.as_bytes() == b"--"
201	}
202
203	#[inline]
204	fn parse_long_opt(self) -> Option<(Self, Option<Self>)> {
205		// WTF-8 makes this fine (this is in hideous implementation-detail land)
206		self.as_bytes().parse_long_opt().map(|(name, value)| (OsArgument::from_bytes(name), value.map(OsArgument::from_bytes)))
207	}
208
209	#[inline]
210	fn parse_short_cluster(self) -> Option<Self> {
211		// WTF-8 makes this fine again!
212		self.as_bytes().parse_short_cluster().map(OsArgument::from_bytes)
213	}
214
215	#[cfg_attr(not(windows), inline)] // UTF-8/WTF-8 codepoint parser included, it big!
216	fn consume_short_opt(self) -> (Self::ShortOpt, Option<Self>) {
217		#[cfg(windows)] {
218			// This is horrible and relies on WTF-8 again!
219			let mut iter = self.as_bytes().iter();
220			let codepoint = unsafe { utf8_bs::next_code_point(&mut iter).unwrap_unchecked() };
221			(ShortOpt::Codepoint(codepoint), Some(iter.as_slice()).filter(|&slice| !slice.is_empty()).map(OsArgument::from_bytes))
222		}
223
224		#[cfg(not(windows))] {
225			let bytes = self.as_bytes();
226
227			// Optimistically try to parse as UTF-8!
228			let first = unsafe { *bytes.get_unchecked(0) };
229			let encoded_length = utf8_bs::utf8_char_width(first);
230
231			let (codepoint, rest) = if let Some(Ok(Some(char))) = bytes.get(0..encoded_length).map(|slice| std::str::from_utf8(slice).map(|str| str.chars().next())) {
232				// SAFETY: We know all of `encoded_length` exists!
233				(ShortOpt::Codepoint(char as u32), unsafe { bytes.get_unchecked(encoded_length..) })
234			} else {
235				// Fall back to one byte at a time if UTF-8 parsing fails!
236				(ShortOpt::Byte(first), unsafe { bytes.get_unchecked(1..) })
237			};
238
239			(codepoint, Some(OsArgument::from_bytes(rest)).filter(|s| !s.is_empty()))
240		}
241	}
242
243	#[inline]
244	fn consume_short_val(self) -> Self {
245		self
246	}
247}
248
249/// Creates an OS string from a literal string (`"whatever"`).
250///
251/// For an unsafe version of this macro that permits invalid UTF-8, see [`osb`].
252/// Note that [`osb`] causes immediate Undefined Behavior with invalid UTF-8 on
253/// on Windows.
254#[macro_export]
255macro_rules! os {
256	($string:literal) => { <&$crate::OsArgument as From<&::std::ffi::OsStr>>::from(unsafe { std::mem::transmute(str::as_bytes($string as &str)) }) }
257}
258
259/// Creates an [`OsStr`] from a literal byte string (`b"whatever"`).
260///
261/// This macro is **unsafe** because creating an [`OsStr`] from invalid UTF-8 is
262/// Undefined Behavior on Windows (but not Unix or Linux).
263#[macro_export]
264macro_rules! osb {
265	($bytes:literal) => { <&$crate::OsArgument as From<&::std::ffi::OsStr>>::from(std::mem::transmute($bytes as &[u8])) }
266}