capitol 0.3.0

Parse United States Congress legislative document citations
Documentation
use crate::legislation::{Chamber, CommitteeDocumentType, Congress, MeasureType};
use crate::utils::Result;
use crate::Citation;
use winnow::ascii::{alpha1, alphanumeric0, digit0, digit1};
use winnow::Parser;
use winnow::Result as WResult;

#[derive(Debug, Default, PartialEq)]
struct CiteParts {
    prefix: Option<usize>,
    object: String,
    number: usize,
    suffix: Option<String>,
}

pub(crate) struct CitationParser {}

impl CitationParser {
    fn is_measure_or_committee_document(object: &str) -> bool {
        [
            "hr", "hres", "hconres", "hjres", "s", "sres", "sconres", "sjres", "srpt", "sprt",
            "hrpt", "hprt",
        ]
        .contains(&object)
    }

    fn parse_prefix<'s>(input: &mut &'s str) -> WResult<&'s str> {
        digit0.parse_next(input)
    }

    fn parse_object<'s>(input: &mut &'s str) -> WResult<&'s str> {
        alpha1.parse_next(input)
    }

    fn parse_number(input: &mut &str) -> WResult<usize> {
        digit1.parse_to().parse_next(input)
    }

    fn parse_suffix<'s>(input: &mut &'s str) -> WResult<&'s str> {
        alphanumeric0.parse_next(input)
    }

    fn tokenize(input: &str) -> Result<CiteParts> {
        // initialize parts container
        let mut parts = CiteParts::default();

        let input = input.to_lowercase();

        if input.starts_with("public law no")
            || input.starts_with("publ")
            || input.starts_with("pl")
        {
            for part in input.split(' ') {
                if part.contains('-') {
                    let congress_and_number: Vec<&str> = part.split('-').collect();
                    return Ok(CiteParts {
                        prefix: Some(congress_and_number[0].parse::<usize>()?),
                        object: "publ".to_string(),
                        number: congress_and_number[1].parse::<usize>()?,
                        suffix: None,
                    });
                }
            }
        }

        let cleaned = input
            .to_lowercase()
            .replace(|c: char| !c.is_alphanumeric(), "");

        let mut input = cleaned.as_str();

        let (prefix, object, number, suffix) = (
            Self::parse_prefix,
            Self::parse_object,
            Self::parse_number,
            Self::parse_suffix,
        )
            .parse_next(&mut input)?;
        let prefix: Option<usize> = prefix.parse().ok();

        parts.prefix = prefix;
        parts.object = object.to_string();
        parts.number = number;
        parts.suffix = if suffix.is_empty() {
            None
        } else {
            Some(suffix.to_string())
        };

        Ok(parts)
    }

    pub(crate) fn parse(input: &str) -> Result<Citation> {
        let parts = Self::tokenize(input)?;
        let number = parts.number;

        if Self::is_measure_or_committee_document(&parts.object) {
            let chamber = Chamber::parse(&parts.object);

            let version = parts.suffix;

            Ok(match parts.object.as_str() {
                "hr" | "hres" | "hconres" | "hjres" | "s" | "sres" | "sconres" | "sjres" => {
                    let measure_type = match parts.object.as_str() {
                        "hr" | "s" => MeasureType::Bill,
                        "hres" | "sres" => MeasureType::Resolution,
                        "hconres" | "sconres" => MeasureType::ConcurrentResolution,
                        "hjres" | "sjres" => MeasureType::JointResolution,
                        _ => panic!(),
                    };
                    let congress = if let Some(num) = parts.prefix {
                        Some(Congress::parse(num)?)
                    } else {
                        None
                    };

                    Citation::Measure {
                        congress,
                        chamber,
                        number,
                        version,
                        measure_type,
                    }
                }
                _ => {
                    let congress = if let Some(num) = parts.prefix {
                        Some(Congress::parse(num)?)
                    } else {
                        None
                    };

                    let document_type = match parts.object.as_str() {
                        "hrpt" | "srpt" => CommitteeDocumentType::Report,
                        "hprt" | "sprt" => CommitteeDocumentType::Print,
                        _ => unreachable!(),
                    };
                    Citation::CommitteeDocument {
                        congress,
                        chamber,
                        document_type,
                        number,
                    }
                }
            })
        } else {
            match parts.object.as_str() {
                "stat" => Ok(Citation::Statute {
                    volume: parts.prefix.unwrap(),
                    page: number,
                }),
                "pl" | "publ" => {
                    let congress = if let Some(num) = parts.prefix {
                        Some(Congress::parse(num)?)
                    } else {
                        None
                    };

                    Ok(Citation::Law { congress, number })
                }
                _ => unreachable!(),
            }
        }
    }
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn test_tokenize_no_ver_house_bill() {
        let mut input = "118hr8070";
        let expected = CiteParts {
            prefix: Some(118),
            object: String::from("hr"),
            number: 8070,
            suffix: None,
        };
        let result = CitationParser::tokenize(&mut input).unwrap();
        assert_eq!(expected, result);
    }

    #[test]
    fn test_tokenize_no_ver_senate_bill() {
        let mut input = "118s5";
        let expected = CiteParts {
            prefix: Some(118),
            object: String::from("s"),
            number: 5,
            suffix: None,
        };
        let result = CitationParser::tokenize(&mut input).unwrap();
        assert_eq!(expected, result);
    }

    #[test]
    fn test_tokenize_with_ver_house_bill() {
        let mut input = "118hr555ih";
        let expected = CiteParts {
            prefix: Some(118),
            object: String::from("hr"),
            number: 555,
            suffix: Some(String::from("ih")),
        };
        let result = CitationParser::tokenize(&mut input).unwrap();
        assert_eq!(expected, result);
    }

    #[test]
    fn test_tokenize_with_ver_senate_bill() {
        let mut input = "118s17is";
        let expected = CiteParts {
            prefix: Some(118),
            object: String::from("s"),
            number: 17,
            suffix: Some(String::from("is")),
        };
        let result = CitationParser::tokenize(&mut input).unwrap();
        assert_eq!(expected, result);
    }

    #[test]
    fn tokenize_no_congress() {
        let mut input = "hr8070";
        let expected = CiteParts {
            prefix: None,
            object: String::from("hr"),
            number: 8070,
            suffix: None,
        };
        let result = CitationParser::tokenize(&mut input).unwrap();
        assert_eq!(expected, result);
    }

    #[test]
    fn tokenize_uppercase_no_congress() {
        let mut input = "HR8070";
        let expected = CiteParts {
            prefix: None,
            object: String::from("hr"),
            number: 8070,
            suffix: None,
        };
        let result = CitationParser::tokenize(&mut input).unwrap();
        assert_eq!(expected, result);
    }

    #[test]
    fn tokenize_law_download_citation() {
        let mut input = "Public Law No: 119-68";
        let expected = CiteParts {
            prefix: Some(119),
            object: String::from("publ"),
            number: 68,
            suffix: None,
        };
        let result = CitationParser::tokenize(&mut input).unwrap();
        assert_eq!(expected, result);

        let mut input = "Public Law No. 119-68";
        let result = CitationParser::tokenize(&mut input).unwrap();
        assert_eq!(expected, result);
    }
}