use super::humanregex::HumanRegex;
pub fn any() -> HumanRegex {
HumanRegex(r".".to_string())
}
pub fn digit() -> HumanRegex {
HumanRegex(r"\d".to_string())
}
pub fn non_digit() -> HumanRegex {
HumanRegex(r"\D".to_string())
}
pub fn word() -> HumanRegex {
HumanRegex(r"\w".to_string())
}
pub fn non_word() -> HumanRegex {
HumanRegex(r"\W".to_string())
}
pub fn whitespace() -> HumanRegex {
HumanRegex(r"\s".to_string())
}
pub fn non_whitespace() -> HumanRegex {
HumanRegex(r"\S".to_string())
}
pub fn within(range: std::ops::RangeInclusive<char>) -> HumanRegex {
HumanRegex(format!("[{}-{}]", range.start(), range.end()))
}
pub fn without(range: std::ops::RangeInclusive<char>) -> HumanRegex {
HumanRegex(format!("[^{}-{}]", range.start(), range.end()))
}
#[allow(missing_docs)] pub enum UnicodeCategory {
Letter,
LowercaseLetter,
UppercaseLetter,
TitlecaseLetter,
CasedLetter,
ModifierLetter,
OtherLetter,
Mark,
NonSpacingMark,
SpaceCombiningMark,
EnclosingMark,
Separator,
SpaceSeparator,
LineSeparator,
ParagraphSeparator,
Symbol,
MathSymbol,
CurrencySymbol,
ModifierSymbol,
OtherSymbol,
Number,
DecimalDigitNumber,
LetterNumber,
OtherNumber,
Punctuation,
DashPunctuation,
OpenPunctuation,
ClosePunctuation,
InitialPunctuation,
FinalPunctuation,
ConnectorPunctuation,
OtherPunctuation,
Other,
Control,
Format,
PrivateUse,
Surrogate,
Unassigned,
}
pub fn unicode_category(category: UnicodeCategory) -> HumanRegex {
HumanRegex(match category {
UnicodeCategory::Letter => r"\p{Letter}".to_string(),
UnicodeCategory::LowercaseLetter => r"\p{Lowercase_Letter}".to_string(),
UnicodeCategory::UppercaseLetter => r"\p{Uppercase_Letter}".to_string(),
UnicodeCategory::TitlecaseLetter => r"\p{Titlecase_Letter}".to_string(),
UnicodeCategory::CasedLetter => r"\p{Cased_Letter}".to_string(),
UnicodeCategory::ModifierLetter => r"\p{Modifier_Letter}".to_string(),
UnicodeCategory::OtherLetter => r"\p{Other_Letter}".to_string(),
UnicodeCategory::Mark => r"\p{Mark}".to_string(),
UnicodeCategory::NonSpacingMark => r"\p{NonSpacing_Mark}".to_string(),
UnicodeCategory::SpaceCombiningMark => r"\p{SpaceCombining_Mark}".to_string(),
UnicodeCategory::EnclosingMark => r"\p{Enclosing_Mark}".to_string(),
UnicodeCategory::Separator => r"\p{Separator}".to_string(),
UnicodeCategory::SpaceSeparator => r"\p{Space_Separator}".to_string(),
UnicodeCategory::LineSeparator => r"\p{Line_Separator}".to_string(),
UnicodeCategory::ParagraphSeparator => r"\p{Paragraph_Separator}".to_string(),
UnicodeCategory::Symbol => r"\p{Symbol}".to_string(),
UnicodeCategory::MathSymbol => r"\p{Math_Symbol}".to_string(),
UnicodeCategory::CurrencySymbol => r"\p{Currency_Symbol}".to_string(),
UnicodeCategory::ModifierSymbol => r"\p{Modifier_Symbol}".to_string(),
UnicodeCategory::OtherSymbol => r"\p{Other_Symbol}".to_string(),
UnicodeCategory::Number => r"\p{Number}".to_string(),
UnicodeCategory::DecimalDigitNumber => r"\p{DecimalDigit_Number}".to_string(),
UnicodeCategory::LetterNumber => r"\p{Letter_Number}".to_string(),
UnicodeCategory::OtherNumber => r"\p{Other_Number}".to_string(),
UnicodeCategory::Punctuation => r"\p{Punctuation}".to_string(),
UnicodeCategory::DashPunctuation => r"\p{Dash_Punctuation}".to_string(),
UnicodeCategory::OpenPunctuation => r"\p{Open_Punctuation}".to_string(),
UnicodeCategory::ClosePunctuation => r"\p{Close_Punctuation}".to_string(),
UnicodeCategory::InitialPunctuation => r"\p{Initial_Punctuation}".to_string(),
UnicodeCategory::FinalPunctuation => r"\p{Final_Punctuation}".to_string(),
UnicodeCategory::ConnectorPunctuation => r"\p{Connector_Punctuation}".to_string(),
UnicodeCategory::OtherPunctuation => r"\p{Other_Punctuation}".to_string(),
UnicodeCategory::Other => r"\p{Other}".to_string(),
UnicodeCategory::Control => r"\p{Control}".to_string(),
UnicodeCategory::Format => r"\p{Format}".to_string(),
UnicodeCategory::PrivateUse => r"\p{Private_Use}".to_string(),
UnicodeCategory::Surrogate => r"\p{Surrogate}".to_string(),
UnicodeCategory::Unassigned => r"\p{Unassigned}".to_string(),
})
}
pub fn non_unicode_category(category: UnicodeCategory) -> HumanRegex {
HumanRegex(unicode_category(category).to_string().replace(r"\p", r"\P"))
}
#[allow(missing_docs)] pub enum UnicodeScript {
Common,
Arabic,
Armenian,
Bengali,
Bopomofo,
Braille,
Buhid,
CandianAboriginal,
Cherokee,
Cyrillic,
Devanagari,
Ethiopic,
Georgian,
Greek,
Gujarati,
Gurkmukhi,
Han,
Hangul,
Hanunoo,
Hebrew,
Hirigana,
Inherited,
Kannada,
Katakana,
Khmer,
Lao,
Latin,
Limbu,
Malayalam,
Mongolian,
Myanmar,
Ogham,
Oriya,
Runic,
Sinhala,
Syriac,
Tagalog,
Tagbanwa,
TaiLe,
Tamil,
Telugu,
Thaana,
Thai,
Tibetan,
Yi,
}
pub fn unicode_script(category: UnicodeScript) -> HumanRegex {
HumanRegex(match category {
UnicodeScript::Common => r"\p{Common}".to_string(),
UnicodeScript::Arabic => r"\p{Arabic}".to_string(),
UnicodeScript::Armenian => r"\p{Armenian}".to_string(),
UnicodeScript::Bengali => r"\p{Bengali}".to_string(),
UnicodeScript::Bopomofo => r"\p{Bopomofo}".to_string(),
UnicodeScript::Braille => r"\p{Braille}".to_string(),
UnicodeScript::Buhid => r"\p{Buhid}".to_string(),
UnicodeScript::CandianAboriginal => r"\p{CandianAboriginal}".to_string(),
UnicodeScript::Cherokee => r"\p{Cherokee}".to_string(),
UnicodeScript::Cyrillic => r"\p{Cyrillic}".to_string(),
UnicodeScript::Devanagari => r"\p{Devanagari}".to_string(),
UnicodeScript::Ethiopic => r"\p{Ethiopic}".to_string(),
UnicodeScript::Georgian => r"\p{Georgian}".to_string(),
UnicodeScript::Greek => r"\p{Greek}".to_string(),
UnicodeScript::Gujarati => r"\p{Gujarati}".to_string(),
UnicodeScript::Gurkmukhi => r"\p{Gurkmukhi}".to_string(),
UnicodeScript::Han => r"\p{Han}".to_string(),
UnicodeScript::Hangul => r"\p{Hangul}".to_string(),
UnicodeScript::Hanunoo => r"\p{Hanunoo}".to_string(),
UnicodeScript::Hebrew => r"\p{Hebrew}".to_string(),
UnicodeScript::Hirigana => r"\p{Hirigana}".to_string(),
UnicodeScript::Inherited => r"\p{Inherited}".to_string(),
UnicodeScript::Kannada => r"\p{Kannada}".to_string(),
UnicodeScript::Katakana => r"\p{Katakana}".to_string(),
UnicodeScript::Khmer => r"\p{Khmer}".to_string(),
UnicodeScript::Lao => r"\p{Lao}".to_string(),
UnicodeScript::Latin => r"\p{Latin}".to_string(),
UnicodeScript::Limbu => r"\p{Limbu}".to_string(),
UnicodeScript::Malayalam => r"\p{Malayalam}".to_string(),
UnicodeScript::Mongolian => r"\p{Mongolian}".to_string(),
UnicodeScript::Myanmar => r"\p{Myanmar}".to_string(),
UnicodeScript::Ogham => r"\p{Ogham}".to_string(),
UnicodeScript::Oriya => r"\p{Oriya}".to_string(),
UnicodeScript::Runic => r"\p{Runic}".to_string(),
UnicodeScript::Sinhala => r"\p{Sinhala}".to_string(),
UnicodeScript::Syriac => r"\p{Syriac}".to_string(),
UnicodeScript::Tagalog => r"\p{Tagalog}".to_string(),
UnicodeScript::Tagbanwa => r"\p{Tagbanwa}".to_string(),
UnicodeScript::TaiLe => r"\p{TaiLe}".to_string(),
UnicodeScript::Tamil => r"\p{Tamil}".to_string(),
UnicodeScript::Telugu => r"\p{Telugu}".to_string(),
UnicodeScript::Thaana => r"\p{Thaana}".to_string(),
UnicodeScript::Thai => r"\p{Thai}".to_string(),
UnicodeScript::Tibetan => r"\p{Tibetan}".to_string(),
UnicodeScript::Yi => r"\p{Yi}".to_string(),
})
}
pub fn non_unicode_script(category: UnicodeScript) -> HumanRegex {
HumanRegex(unicode_script(category).to_string().replace(r"\p", r"\P"))
}