1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
use const_format::formatcp;
/// A Trait used by `sscanf` to obtain the Regex of a Type
///
/// Has one associated Constant: `REGEX`, which should be set to a regular Expression.
/// Implement this trait for a Type that you want to be parsed using sscanf.
///
/// The Regular Expression should match the string representation as exactly as possible.
/// Any incorrect matches might be caught in the from_str parsing, but that might cause this
/// regex to take characters that could have been matched by other placeholders, leading to
/// unexpected parsing failures.
///
/// ## Implementing the Trait
///
/// A manual implementation of this trait is only necessary if you
/// - want to use a custom Type that is not supported by default **AND**
/// - cannot use [`#[derive(FromScanf)]`](derive.FromScanf.html) on your Type
///
/// Deriving [`FromScanf`](crate::FromScanf) will automatically implement this trait for your Type,
/// and should be preferred in most cases.
///
/// If you do need to implement this trait yourself, note the following:
/// - The regex cannot contain any capture groups (round brackets). If you need to use `( )` in your
/// regex, use `(?: )` instead to make it non-capturing.
/// - Using a raw string literal (`r"..."`) is recommended to avoid having to escape backslashes.
/// - The [`const_format`] crate can be used to combine multiple
/// strings into one, which is useful for complex regexes. This can also be used to combine the
/// existing regex implementation of other types. `sscanf` internally uses `const_format` as well,
/// so a version of it is re-exported under `sscanf::const_format`.
///
/// ## Example
/// Let's say we want to add a Fraction parser
/// ```
/// struct Fraction(isize, usize);
/// ```
/// Which can be obtained from any string of the kind `±X/Y` or just `X`
/// ```
/// # struct Fraction(isize, usize);
/// impl sscanf::RegexRepresentation for Fraction {
/// /// matches an optional '-' or '+' followed by a number.
/// /// possibly with a '/' and another Number
/// const REGEX: &'static str = r"[-+]?\d+(?:/\d+)?";
/// // ^^ escapes the group. Has to be used on any ( ) in a regex.
///
/// // alternatively, we could use const_format to reuse existing regexes:
/// // REGEX = const_format::concatcp!(isize::REGEX, "(?:", "/", usize::REGEX, ")?");
/// }
/// impl std::str::FromStr for Fraction {
/// type Err = std::num::ParseIntError;
/// fn from_str(s: &str) -> Result<Self, Self::Err> {
/// let mut iter = s.split('/');
/// let num = iter.next().unwrap().parse()?;
/// let mut denom = 1;
/// if let Some(d) = iter.next() {
/// denom = d.parse()?;
/// };
/// Ok(Fraction(num, denom))
/// }
/// }
/// ```
/// Now we can use this `Fraction` struct in `sscanf`:
/// ```
/// # #[derive(Debug, PartialEq)]
/// # struct Fraction(isize, usize);
/// # impl sscanf::RegexRepresentation for Fraction {
/// # const REGEX: &'static str = r"[-+]?\d+(?:/\d+)?";
/// # }
/// # impl std::str::FromStr for Fraction {
/// # type Err = std::num::ParseIntError;
/// # fn from_str(s: &str) -> Result<Self, Self::Err> {
/// # let mut iter = s.split('/');
/// # let num = iter.next().unwrap().parse()?;
/// # let denom = iter.next().map(|d| d.parse()).transpose()?.unwrap_or(1);
/// # Ok(Fraction(num, denom))
/// # }
/// # }
/// # use sscanf::sscanf;
///
/// let output = sscanf!("2/5", "{}", Fraction);
/// assert_eq!(output.unwrap(), Fraction(2, 5));
///
/// let output = sscanf!("-25/3", "{}", Fraction);
/// assert_eq!(output.unwrap(), Fraction(-25, 3));
///
/// let output = sscanf!("8", "{}", Fraction);
/// assert_eq!(output.unwrap(), Fraction(8, 1));
///
/// let output = sscanf!("6e/3", "{}", Fraction);
/// assert!(output.is_err());
///
/// let output = sscanf!("6/-3", "{}", Fraction);
/// assert!(output.is_err()); // only first number can be negative
///
/// let output = sscanf!("6/3", "{}", Fraction);
/// assert_eq!(output.unwrap(), Fraction(6, 3));
/// ```
pub trait RegexRepresentation {
/// A regular Expression that exactly matches any String representation of the implementing Type
const REGEX: &'static str;
}
// float syntax: https://doc.rust-lang.org/std/primitive.f32.html#grammar
//
// Float ::= Sign? ( 'inf' | 'infinity' | 'nan' | Number )
const FLOAT: &str = formatcp!(r"{SIGN}?(?i:inf|infinity|nan|{NUMBER})",);
// Number ::= ( Digit+ | Digit+ '.' Digit* | Digit* '.' Digit+ ) Exp?
const NUMBER: &str = formatcp!(r"(?:{DIGIT}+|{DIGIT}+\.{DIGIT}*|{DIGIT}*\.{DIGIT}+)(?:{EXP})?",);
// Exp ::= 'e' Sign? Digit+
const EXP: &str = formatcp!(r"e{SIGN}?{DIGIT}+");
// Sign ::= [+-]
const SIGN: &str = r"[+-]";
// Digit ::= [0-9]
const DIGIT: &str = r"\d";
macro_rules! doc_concat {
($target: item, $($doc: expr),+) => {
$(
#[doc = $doc]
)+
$target
};
}
macro_rules! impl_num {
($spec: literal, $prefix: literal; $(($ty: ty, $n: literal)),+) => {
impl_num!($spec, $prefix; $(($ty, $n, $n)),+);
};
($spec: literal, $prefix: literal; $(($ty: ty, $n: literal, $doc: literal)),+) => {
$(impl RegexRepresentation for $ty {
doc_concat!{
const REGEX: &'static str = concat!($prefix, $n, "}");,
"Matches ", $spec, " number with up to", stringify!($doc), "digits\n",
"```",
"# use sscanf::RegexRepresentation; use std::num::*;",
concat!("assert_eq!(", stringify!($ty), "::REGEX, r\"", $prefix, $n, "}\");"),
"```"
}
})+
};
(f64; $($ty: ty),+) => {
$(impl RegexRepresentation for $ty {
doc_concat!{
const REGEX: &'static str = FLOAT;,
"Matches any floating point number",
"",
concat!("See See [FromStr on ", stringify!($ty), "](https://doc.rust-lang.org/std/primitive.", stringify!($ty), ".html#method.from_str) for details"),
"```",
"# use sscanf::RegexRepresentation;",
concat!("assert_eq!(", stringify!($ty), r#"::REGEX, r"[+-]?(?i:inf|infinity|nan|(?:\d+|\d+\.\d*|\d*\.\d+)(?:e[+-]?\d+)?)");"#),
"```"
}
})+
};
}
use std::num::*;
impl_num!("any positive", r"\+?\d{1,";
(u8, 3),
(u16, 5),
(u32, 10),
(u64, 20),
(u128, 39),
(usize, 20)
);
impl_num!("any positive non-zero", r"\+?[1-9]\d{0,";
(NonZeroU8, 2, 3),
(NonZeroU16, 4, 5),
(NonZeroU32, 9, 10),
(NonZeroU64, 19, 20),
(NonZeroU128, 38, 39),
(NonZeroUsize, 19, 20)
);
impl_num!("any", r"[-+]?\d{1,";
(i8, 3),
(i16, 5),
(i32, 10),
(i64, 20),
(i128, 39),
(isize, 20)
);
impl_num!("any non-zero", r"[-+]?[1-9]\d{0,";
(NonZeroI8, 2, 3),
(NonZeroI16, 4, 5),
(NonZeroI32, 9, 10),
(NonZeroI64, 19, 20),
(NonZeroI128, 38, 39),
(NonZeroIsize, 19, 20)
);
impl_num!(f64; f32, f64);
impl RegexRepresentation for String {
/// Matches any sequence of Characters.
///
/// Note that this clones part of the input string, which is usually not necessary. Use
/// [`str`](#impl-RegexRepresentation-for-str) unless you explicitly need ownership.
/// ```
/// # use sscanf::RegexRepresentation;
/// assert_eq!(String::REGEX, r".+?")
/// ```
const REGEX: &'static str = r".+?";
}
impl RegexRepresentation for str {
/// Matches any sequence of Characters.
///
/// Note that this is the non-borrowed form of the usual `&str`. This is the type that should be
/// used when calling sscanf!() because of proc-macro limitations. The type returned by sscanf!()
/// is `&str` as one would expect.
///
/// This is also currently the only type that borrows part of the input string, so you need to
/// keep lifetimes in mind when using this type. If the input string doesn't live long enough,
/// use [`String`](#impl-RegexRepresentation-for-String) instead.
/// ```
/// # use sscanf::RegexRepresentation;
/// assert_eq!(str::REGEX, r".+?")
/// ```
const REGEX: &'static str = r".+?";
}
impl RegexRepresentation for char {
/// Matches a single Character.
/// ```
/// # use sscanf::RegexRepresentation;
/// assert_eq!(char::REGEX, r".")
/// ```
const REGEX: &'static str = r".";
}
impl RegexRepresentation for bool {
/// Matches `true` or `false`.
/// ```
/// # use sscanf::RegexRepresentation;
/// assert_eq!(bool::REGEX, r"true|false")
/// ```
const REGEX: &'static str = r"true|false";
}
impl RegexRepresentation for std::path::PathBuf {
/// Matches any sequence of Characters.
///
/// Paths in `std` don't actually have any restrictions on what they can contain, so anything
/// is valid.
/// ```
/// # use sscanf::RegexRepresentation; use std::path::PathBuf;
/// assert_eq!(PathBuf::REGEX, r".+")
/// ```
const REGEX: &'static str = r".+";
}
#[test]
#[rustfmt::skip]
fn no_capture_groups() {
macro_rules! check {
($($ty: ty),+) => {
$(
let regex = regex::Regex::new(<$ty>::REGEX).unwrap();
assert_eq!(regex.captures_len(), 1, "Regex for {} >>{}<< has capture groups", stringify!($ty), <$ty>::REGEX);
// 1 for the whole match
)+
};
}
check!(u8, u16, u32, u64, u128, usize);
check!(i8, i16, i32, i64, i128, isize);
check!(NonZeroU8, NonZeroU16, NonZeroU32, NonZeroU64, NonZeroU128, NonZeroUsize);
check!(NonZeroI8, NonZeroI16, NonZeroI32, NonZeroI64, NonZeroI128, NonZeroIsize);
check!(f32, f64);
check!(String, str, char, bool);
check!(std::path::PathBuf);
}