1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
#![allow(clippy::tabs_in_doc_comments)] // what a stupid fucking lint

//! Adds a newtype wrapper ([`OsArgument`]) around [`OsStr`] that allows it to
//! be parsed by [`getargs::Options`].
//!
//! In combination with the [`argv`](https://docs.rs/argv) crate, this allows
//! for lowest-cost argument parsing across all platforms (zero-cost on Linux).
//!
//! This is a separate crate from `getargs` because it requires (wildly) unsafe
//! code. `std` does not want us messing with [`OsStr`]s at all!
//!
//! ## Usage
//!
//! First, obtain an iterator over [`OsStr`]s somehow - I recommend
//! [`argv`](https://docs.rs/argv) once again - then wrap them in [`OsArgument`]
//! and pass that to [`Options::new`][getargs::Options::new].
//!
//! ```compile_only
//! # fn main() {
//! use getargs::Options;
//! use getargs_os::OsArgument;
//!
//! let mut opts = Options::new(argv::iter().skip(1).map(<&OsArgument>::from));
//! # }
//! ```
//!
//! Then use [`Options`][getargs::Options] as normal - check its documentation
//! for more usage examples.
//!
//! You can use the [`os!`] macro to create new OS strings to compare arguments
//! against. This macro works on all operating systems. For example:
//!
//! ```compile_only
//! # fn main() {
//! # use getargs::{Options, Arg};
//! # use getargs_os::{os, OsArgument};
//! # let mut opts = Options::new(argv::iter().skip(1).map(<&OsArgument>::from));
//! while let Some(arg) = opts.next_arg().expect("some ooga booga just happened") {
//! 	if arg == Arg::Long(os!("help")) {
//! 		// print help...
//! 	} else {
//! 		// ...
//! 	}
//! }
//! # }
//! ```
//!

use std::ffi::OsStr;
use std::fmt::{Debug, Formatter};
use std::hash::{Hash, Hasher};
use std::ops::{Deref, DerefMut};
use getargs::Argument;

mod utf8_bs;

#[cfg(test)]
mod test;

/// A newtype wrapper around [`OsStr`] that allows it to be parsed by
/// [`Options`][getargs::Options].
///
/// The short option type for this [`Argument`] implementation is *UTF-8
/// codepoints*; however they may not all be valid `char`s.
#[repr(transparent)]
pub struct OsArgument(pub OsStr);

impl<'a> From<&'a OsStr> for &'a OsArgument {
	fn from(from: &'a OsStr) -> Self {
		// SAFETY: `OsArgument` is `repr(transparent)`
		unsafe { std::mem::transmute(from) }
	}
}

impl<'a> From<&'a OsArgument> for &'a OsStr {
	fn from(from: &'a OsArgument) -> Self {
		// SAFETY: `OsArgument` is `repr(transparent)`
		unsafe { std::mem::transmute(from) }
	}
}

impl OsArgument {
	fn as_bytes(&self) -> &[u8] {
		#[cfg(windows)]
		// SAFETY: This relies on representation! This is not future-proof!
		// But there is no other way to do this, OsStr is completely opaque!
		// `std` tries very hard to hide the contents from us!
		unsafe { std::mem::transmute(&self.0) }

		#[cfg(not(windows))]
		// Unix is awesome and `OsStr`s are just byte arrays
		std::os::unix::ffi::OsStrExt::as_bytes(&self.0)
	}

	fn from_bytes(bytes: &[u8]) -> &Self {
		#[cfg(windows)]
		// SAFETY: Ditto above!
		unsafe { std::mem::transmute(bytes) }

		#[cfg(not(windows))]
		// Unix is awesome and `OsStr`s are just byte arrays
		<&Self as From<&OsStr>>::from(std::os::unix::ffi::OsStrExt::from_bytes(bytes))
	}
}

impl Deref for OsArgument {
	type Target = OsStr;

	fn deref(&self) -> &Self::Target { &self.0 }
}

impl DerefMut for OsArgument {
	fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 }
}

impl PartialEq for OsArgument {
	fn eq(&self, other: &Self) -> bool { self.0 == other.0 }
}

impl Eq for OsArgument {}

impl Debug for OsArgument {
	fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { self.0.fmt(f) }
}

impl Hash for OsArgument {
	fn hash<H: Hasher>(&self, state: &mut H) { self.0.hash(state) }
}

/// Represents either a Unicode codepoint or an arbitrary byte. Used by
/// [`OsArgument`] to represent short options.
#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)]
pub enum ShortOpt {
	/// A Unicode codepoint. On Windows, short options will always be valid
	/// codepoints (but may be invalid characters, such as unpaired surrogates).
	Codepoint(u32),

	/// An arbitrary byte, which can happen if the [`OsStr`] is invalid Unicode.
	/// Windows always has valid codepoints, but this may be encountered on Unix
	/// or Linux systems.
	Byte(u8)
}

impl From<char> for ShortOpt {
	fn from(codepoint: char) -> Self {
		Self::Codepoint(codepoint as u32)
	}
}

impl From<u32> for ShortOpt {
	fn from(codepoint: u32) -> Self {
		Self::Codepoint(codepoint)
	}
}

impl From<u8> for ShortOpt {
	fn from(byte: u8) -> Self {
		Self::Byte(byte)
	}
}

impl Argument for &'_ OsArgument {
	type ShortOpt = ShortOpt;

	#[inline]
	fn ends_opts(self) -> bool {
		self.as_bytes() == b"--"
	}

	#[inline]
	fn parse_long_opt(self) -> Option<(Self, Option<Self>)> {
		// WTF-8 makes this fine (this is in hideous implementation-detail land)
		self.as_bytes().parse_long_opt().map(|(name, value)| (OsArgument::from_bytes(name), value.map(OsArgument::from_bytes)))
	}

	#[inline]
	fn parse_short_cluster(self) -> Option<Self> {
		// WTF-8 makes this fine again!
		self.as_bytes().parse_short_cluster().map(OsArgument::from_bytes)
	}

	#[cfg_attr(not(windows), inline)] // UTF-8/WTF-8 codepoint parser included, it big!
	fn consume_short_opt(self) -> (Self::ShortOpt, Option<Self>) {
		#[cfg(windows)] {
			// This is horrible and relies on WTF-8 again!
			let mut iter = self.as_bytes().iter();
			let codepoint = unsafe { utf8_bs::next_code_point(&mut iter).unwrap_unchecked() };
			(ShortOpt::Codepoint(codepoint), Some(iter.as_slice()).filter(|&slice| !slice.is_empty()).map(OsArgument::from_bytes))
		}

		#[cfg(not(windows))] {
			let bytes = self.as_bytes();

			// Optimistically try to parse as UTF-8!
			let first = unsafe { *bytes.get_unchecked(0) };
			let encoded_length = utf8_bs::utf8_char_width(first);

			let (codepoint, rest) = if let Some(Ok(Some(char))) = bytes.get(0..encoded_length).map(|slice| std::str::from_utf8(slice).map(|str| str.chars().next())) {
				// SAFETY: We know all of `encoded_length` exists!
				(ShortOpt::Codepoint(char as u32), unsafe { bytes.get_unchecked(encoded_length..) })
			} else {
				// Fall back to one byte at a time if UTF-8 parsing fails!
				(ShortOpt::Byte(first), unsafe { bytes.get_unchecked(1..) })
			};

			(codepoint, Some(OsArgument::from_bytes(rest)).filter(|s| !s.is_empty()))
		}
	}

	#[inline]
	fn consume_short_val(self) -> Self {
		self
	}
}

/// Creates an OS string from a literal string (`"whatever"`).
///
/// For an unsafe version of this macro that permits invalid UTF-8, see [`osb`].
/// Note that [`osb`] causes immediate Undefined Behavior with invalid UTF-8 on
/// on Windows.
#[macro_export]
macro_rules! os {
	($string:literal) => { <&$crate::OsArgument as From<&::std::ffi::OsStr>>::from(unsafe { std::mem::transmute(str::as_bytes($string as &str)) }) }
}

/// Creates an [`OsStr`] from a literal byte string (`b"whatever"`).
///
/// This macro is **unsafe** because creating an [`OsStr`] from invalid UTF-8 is
/// Undefined Behavior on Windows (but not Unix or Linux).
#[macro_export]
macro_rules! osb {
	($bytes:literal) => { <&$crate::OsArgument as From<&::std::ffi::OsStr>>::from(std::mem::transmute($bytes as &[u8])) }
}