use crate::legislation::{Chamber, CommitteeDocumentType, Congress, MeasureType};
use crate::utils::Result;
use crate::Citation;
use winnow::ascii::{alpha1, alphanumeric0, digit0, digit1};
use winnow::Parser;
use winnow::Result as WResult;
#[derive(Debug, Default, PartialEq)]
struct CiteParts {
prefix: Option<usize>,
object: String,
number: usize,
suffix: Option<String>,
}
pub(crate) struct CitationParser {}
impl CitationParser {
fn is_measure_or_committee_document(object: &str) -> bool {
[
"hr", "hres", "hconres", "hjres", "s", "sres", "sconres", "sjres", "srpt", "sprt",
"hrpt", "hprt",
]
.contains(&object)
}
fn parse_prefix<'s>(input: &mut &'s str) -> WResult<&'s str> {
digit0.parse_next(input)
}
fn parse_object<'s>(input: &mut &'s str) -> WResult<&'s str> {
alpha1.parse_next(input)
}
fn parse_number(input: &mut &str) -> WResult<usize> {
digit1.parse_to().parse_next(input)
}
fn parse_suffix<'s>(input: &mut &'s str) -> WResult<&'s str> {
alphanumeric0.parse_next(input)
}
fn tokenize(input: &str) -> Result<CiteParts> {
let mut parts = CiteParts::default();
let input = input.to_lowercase();
if input.starts_with("public law no")
|| input.starts_with("publ")
|| input.starts_with("pl")
{
for part in input.split(' ') {
if part.contains('-') {
let congress_and_number: Vec<&str> = part.split('-').collect();
return Ok(CiteParts {
prefix: Some(congress_and_number[0].parse::<usize>()?),
object: "publ".to_string(),
number: congress_and_number[1].parse::<usize>()?,
suffix: None,
});
}
}
}
let cleaned = input
.to_lowercase()
.replace(|c: char| !c.is_alphanumeric(), "");
let mut input = cleaned.as_str();
let (prefix, object, number, suffix) = (
Self::parse_prefix,
Self::parse_object,
Self::parse_number,
Self::parse_suffix,
)
.parse_next(&mut input)?;
let prefix: Option<usize> = prefix.parse().ok();
parts.prefix = prefix;
parts.object = object.to_string();
parts.number = number;
parts.suffix = if suffix.is_empty() {
None
} else {
Some(suffix.to_string())
};
Ok(parts)
}
pub(crate) fn parse(input: &str) -> Result<Citation> {
let parts = Self::tokenize(input)?;
let number = parts.number;
if Self::is_measure_or_committee_document(&parts.object) {
let chamber = Chamber::parse(&parts.object);
let version = parts.suffix;
Ok(match parts.object.as_str() {
"hr" | "hres" | "hconres" | "hjres" | "s" | "sres" | "sconres" | "sjres" => {
let measure_type = match parts.object.as_str() {
"hr" | "s" => MeasureType::Bill,
"hres" | "sres" => MeasureType::Resolution,
"hconres" | "sconres" => MeasureType::ConcurrentResolution,
"hjres" | "sjres" => MeasureType::JointResolution,
_ => panic!(),
};
let congress = if let Some(num) = parts.prefix {
Some(Congress::parse(num)?)
} else {
None
};
Citation::Measure {
congress,
chamber,
number,
version,
measure_type,
}
}
_ => {
let congress = if let Some(num) = parts.prefix {
Some(Congress::parse(num)?)
} else {
None
};
let document_type = match parts.object.as_str() {
"hrpt" | "srpt" => CommitteeDocumentType::Report,
"hprt" | "sprt" => CommitteeDocumentType::Print,
_ => unreachable!(),
};
Citation::CommitteeDocument {
congress,
chamber,
document_type,
number,
}
}
})
} else {
match parts.object.as_str() {
"stat" => Ok(Citation::Statute {
volume: parts.prefix.unwrap(),
page: number,
}),
"pl" | "publ" => {
let congress = if let Some(num) = parts.prefix {
Some(Congress::parse(num)?)
} else {
None
};
Ok(Citation::Law { congress, number })
}
_ => unreachable!(),
}
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_tokenize_no_ver_house_bill() {
let mut input = "118hr8070";
let expected = CiteParts {
prefix: Some(118),
object: String::from("hr"),
number: 8070,
suffix: None,
};
let result = CitationParser::tokenize(&mut input).unwrap();
assert_eq!(expected, result);
}
#[test]
fn test_tokenize_no_ver_senate_bill() {
let mut input = "118s5";
let expected = CiteParts {
prefix: Some(118),
object: String::from("s"),
number: 5,
suffix: None,
};
let result = CitationParser::tokenize(&mut input).unwrap();
assert_eq!(expected, result);
}
#[test]
fn test_tokenize_with_ver_house_bill() {
let mut input = "118hr555ih";
let expected = CiteParts {
prefix: Some(118),
object: String::from("hr"),
number: 555,
suffix: Some(String::from("ih")),
};
let result = CitationParser::tokenize(&mut input).unwrap();
assert_eq!(expected, result);
}
#[test]
fn test_tokenize_with_ver_senate_bill() {
let mut input = "118s17is";
let expected = CiteParts {
prefix: Some(118),
object: String::from("s"),
number: 17,
suffix: Some(String::from("is")),
};
let result = CitationParser::tokenize(&mut input).unwrap();
assert_eq!(expected, result);
}
#[test]
fn tokenize_no_congress() {
let mut input = "hr8070";
let expected = CiteParts {
prefix: None,
object: String::from("hr"),
number: 8070,
suffix: None,
};
let result = CitationParser::tokenize(&mut input).unwrap();
assert_eq!(expected, result);
}
#[test]
fn tokenize_uppercase_no_congress() {
let mut input = "HR8070";
let expected = CiteParts {
prefix: None,
object: String::from("hr"),
number: 8070,
suffix: None,
};
let result = CitationParser::tokenize(&mut input).unwrap();
assert_eq!(expected, result);
}
#[test]
fn tokenize_law_download_citation() {
let mut input = "Public Law No: 119-68";
let expected = CiteParts {
prefix: Some(119),
object: String::from("publ"),
number: 68,
suffix: None,
};
let result = CitationParser::tokenize(&mut input).unwrap();
assert_eq!(expected, result);
let mut input = "Public Law No. 119-68";
let result = CitationParser::tokenize(&mut input).unwrap();
assert_eq!(expected, result);
}
}