use const_format::formatcp;
use reporters_db::regexes::ResolvedRegex;
pub const ROMAN_NUMERAL_REGEX: &str = formatcp!(
"{}|{}|{}",
r"c?(?:xc|xl|l?x{1,3})(?:ix|iv|v?i{0,3})",
r"(?:c?l?)(?:ix|iv|v?i{1,3})",
r"(?:lv|cv|cl|clv)",
);
pub const PAGE_NUMBER_REGEX: &str = formatcp!(r"(?:\d+|{})", ROMAN_NUMERAL_REGEX);
pub const PAGE_REGEX: &str = formatcp!("(?P<page>{})", PAGE_NUMBER_REGEX);
macro_rules! space_boundaries_re {
($regex:expr) => {
formatcp!(r"(?:^|\s)({})(?:\s|$)", $regex)
};
}
macro_rules! strip_punctuation_re {
($regex:expr) => {
formatcp!(
r"{PUNCTUATION_REGEX}{}{PUNCTUATION_REGEX}",
$regex,
PUNCTUATION_REGEX = PUNCTUATION_REGEX
)
};
}
pub const ID_REGEX: &str = space_boundaries_re!(r"id\.,?|ibid\.");
pub const SUPRA_REGEX: &str = space_boundaries_re!(strip_punctuation_re!("supra"));
pub const PARAGRAPH_REGEX: &str = r"(\n)";
macro_rules! join_with {
($sep:literal, [ $s:literal ] ) => { $s };
($sep:literal, [ $s:literal, $t:literal ] ) => { formatcp!("{}{}{}", $s, $sep, $t) };
($sep:literal, [ $s:literal, $($rest:literal),+ ] ) => {
formatcp!("{}{}{}", $s, $sep, join_with!($sep, [ $( $rest ),* ]))
};
($sep:literal, [ $s:literal, $($rest:literal),+ , ] ) => {
formatcp!("{}{}{}", $s, $sep, join_with!($sep, [ $( $rest ),* ]))
};
}
pub const STOP_WORDS_JOINED: &str = join_with!(
"|",
[
"v",
"re",
"parte",
"denied",
"citing",
"aff'd",
"affirmed",
"remanded",
"see",
"granted",
"dismissed",
]
);
pub const STOP_WORDS: [&str; 11] = [
"v",
"re",
"parte",
"denied",
"citing",
"aff'd",
"affirmed",
"remanded",
"see",
"granted",
"dismissed",
];
pub const STOP_WORD_REGEX: &str = space_boundaries_re!(strip_punctuation_re!(formatcp!(
r"(?P<stop_word>{})",
STOP_WORDS_JOINED
)));
pub const SECTION_REGEX: &str = r"(\S*ยง\S*)";
pub const PUNCTUATION_REGEX: &str = r"[^\sa-zA-Z0-9]*";
pub fn nonalphanum_boundaries_re(regex: &ResolvedRegex) -> ResolvedRegex {
ResolvedRegex::of(format!(
r"(?:^|[^a-zA-Z0-9])({})(?:[^a-zA-Z0-9]|$)",
regex.value()
))
}
#[allow(clippy::invalid_regex)]
pub fn short_cite_re(regex: &str) -> ResolvedRegex {
let replaced = regex::RegexBuilder::new(
r#"# reporter group:
(
\(\?P<reporter>[^)]+\)
)
(?:,\?)?\ # comma and space
# page group:
(
\(\?P<page>
)"#,
)
.ignore_whitespace(true)
.build()
.unwrap()
.replace_all(regex, r"$1,? at $2");
ResolvedRegex::of(replaced.to_string())
}