use crate::Citation;
use crate::constants::{
COMMITTEE_DOCUMENT_SEQUENCES, MEASURE_SEQUENCES, PUBL_SEQUENCES, STATUTE_SEQUENCES,
};
use crate::error::Error;
use crate::legislation::{Chamber, CommitteeDocumentType, Congress, MeasureType};
use crate::utils::Result;
use winnow::Parser;
use winnow::Result as WResult;
use winnow::ascii::{alpha1, alphanumeric0, digit0, digit1};
#[derive(Debug, Default, PartialEq)]
struct CiteParts {
prefix: Option<usize>,
object: String,
number: usize,
suffix: Option<String>,
}
pub(crate) struct CitationParser {}
impl CitationParser {
fn parse_prefix<'s>(input: &mut &'s str) -> WResult<&'s str> {
digit0.parse_next(input)
}
fn parse_object<'s>(input: &mut &'s str) -> WResult<&'s str> {
alpha1.parse_next(input)
}
fn parse_number(input: &mut &str) -> WResult<usize> {
digit1.parse_to().parse_next(input)
}
fn parse_suffix<'s>(input: &mut &'s str) -> WResult<&'s str> {
alphanumeric0.parse_next(input)
}
fn tokenize(input: &str) -> Result<CiteParts> {
let mut parts = CiteParts::default();
let input = input.to_lowercase();
if PUBL_SEQUENCES.iter().any(|s| input.starts_with(s)) {
for part in input.split(' ') {
if part.contains('-') {
let congress_and_number: Vec<&str> = part.split('-').collect();
return Ok(CiteParts {
prefix: Some(congress_and_number[0].parse::<usize>()?),
object: "publ".to_string(),
number: congress_and_number[1].parse::<usize>()?,
suffix: None,
});
}
}
}
let cleaned = input
.to_lowercase()
.replace(|c: char| !c.is_alphanumeric(), "");
let mut input = cleaned.as_str();
let (prefix, object, number, suffix) = (
Self::parse_prefix,
Self::parse_object,
Self::parse_number,
Self::parse_suffix,
)
.parse_next(&mut input)?;
let prefix: Option<usize> = prefix.parse().ok();
parts.prefix = prefix;
parts.object = object.to_string();
parts.number = number;
parts.suffix = if suffix.is_empty() {
None
} else {
Some(suffix.to_string())
};
Ok(parts)
}
pub(crate) fn parse(input: &str) -> Result<Citation> {
let parts = Self::tokenize(input)?;
let chamber = Chamber::parse(&parts.object);
let document_type = parts.object.as_str();
let congress = if let Some(num) = parts.prefix {
Congress::parse(num, chamber.as_ref(), document_type).ok()
} else {
None
};
let number = parts.number;
if MEASURE_SEQUENCES.contains(&document_type) {
let version = parts.suffix;
let measure_type =
MeasureType::parse(document_type).ok_or(Error::UnknownLegislativeDocumentType)?;
Ok(Citation::Measure {
congress,
chamber: chamber.unwrap(),
number,
version,
measure_type,
})
} else if COMMITTEE_DOCUMENT_SEQUENCES.contains(&document_type) {
let document_type = CommitteeDocumentType::parse(document_type)
.ok_or(Error::UnknownLegislativeDocumentType)?;
Ok(Citation::CommitteeDocument {
congress,
chamber: chamber.unwrap(),
document_type,
number,
})
} else if PUBL_SEQUENCES.contains(&document_type) {
Ok(Citation::Law { congress, number })
} else if STATUTE_SEQUENCES.contains(&document_type) {
Ok(Citation::Statute {
volume: parts.prefix.unwrap(),
page: number,
})
} else {
Err(Error::UnknownLegislativeDocumentType)
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_tokenize_no_ver_house_bill() {
let mut input = "118hr8070";
let expected = CiteParts {
prefix: Some(118),
object: String::from("hr"),
number: 8070,
suffix: None,
};
let result = CitationParser::tokenize(&mut input).unwrap();
assert_eq!(expected, result);
}
#[test]
fn test_tokenize_no_ver_senate_bill() {
let mut input = "118s5";
let expected = CiteParts {
prefix: Some(118),
object: String::from("s"),
number: 5,
suffix: None,
};
let result = CitationParser::tokenize(&mut input).unwrap();
assert_eq!(expected, result);
}
#[test]
fn test_tokenize_with_ver_house_bill() {
let mut input = "118hr555ih";
let expected = CiteParts {
prefix: Some(118),
object: String::from("hr"),
number: 555,
suffix: Some(String::from("ih")),
};
let result = CitationParser::tokenize(&mut input).unwrap();
assert_eq!(expected, result);
}
#[test]
fn test_tokenize_with_ver_senate_bill() {
let mut input = "118s17is";
let expected = CiteParts {
prefix: Some(118),
object: String::from("s"),
number: 17,
suffix: Some(String::from("is")),
};
let result = CitationParser::tokenize(&mut input).unwrap();
assert_eq!(expected, result);
}
#[test]
fn tokenize_no_congress() {
let mut input = "hr8070";
let expected = CiteParts {
prefix: None,
object: String::from("hr"),
number: 8070,
suffix: None,
};
let result = CitationParser::tokenize(&mut input).unwrap();
assert_eq!(expected, result);
}
#[test]
fn tokenize_uppercase_no_congress() {
let mut input = "HR8070";
let expected = CiteParts {
prefix: None,
object: String::from("hr"),
number: 8070,
suffix: None,
};
let result = CitationParser::tokenize(&mut input).unwrap();
assert_eq!(expected, result);
}
#[test]
fn tokenize_law_download_citation() {
let mut input = "Public Law No: 119-68";
let expected = CiteParts {
prefix: Some(119),
object: String::from("publ"),
number: 68,
suffix: None,
};
let result = CitationParser::tokenize(&mut input).unwrap();
assert_eq!(expected, result);
let mut input = "Public Law No. 119-68";
let result = CitationParser::tokenize(&mut input).unwrap();
assert_eq!(expected, result);
}
}